In [ ]:
# use if you have a gpu
# !pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
# import cudf
# %load_ext cudf.pandas
In [ ]:
def classify_value(value):
for label, interval in occ_labels.items():
if interval[0] <= value <= interval[1]:
return label
occ_labels = {'Occ_Class_0': (0, 0)}
j = 1
for i in range(1, 101, 10):
occ_labels[f'Occ_Class_{j}'] = (i, i + 9)
j += 1
occ_labels
Out[ ]:
{'Occ_Class_0': (0, 0),
'Occ_Class_1': (1, 10),
'Occ_Class_2': (11, 20),
'Occ_Class_3': (21, 30),
'Occ_Class_4': (31, 40),
'Occ_Class_5': (41, 50),
'Occ_Class_6': (51, 60),
'Occ_Class_7': (61, 70),
'Occ_Class_8': (71, 80),
'Occ_Class_9': (81, 90),
'Occ_Class_10': (91, 100)}
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import sklearn
import plotly.graph_objects as go
import plotly.express as px
from joblib import Parallel, delayed
import joblib
from collections import Counter
In [ ]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Concat.csv')
In [ ]:
df.shape # (rows, columns)
Out[ ]:
(12071, 18)
In [ ]:
df.replace('$', np.nan, inplace = True)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['CO (ppm)'] = df['CO (ppm)'].astype('float64')
df['NO2 (ppm)'] = df['NO2 (ppm)'].astype('float64')
df['CO2 (ppm)'] = df['CO2 (ppm)'].astype('float64')
df['Occupancy_Classified'] = df['Occupancy'].apply(classify_value).map(lambda x: int(x.split('_')[2]) - 1)
df = df.query("`Occupancy_Classified` != -1").reset_index(drop = True)
In [ ]:
df.head()
Out[ ]:
| Timestamp | CO (ppm) | NO2 (ppm) | CO2 (ppm) | TVOC (ppb) | PM1 (ug/m3) | PM2.5 (ug/m3) | PM10 (ug/m3) | Temperature (C) | Humidity (%) | Sound (dB) | Occupancy | Position | Room Condition | Room Type | Floor No. | Weather | Occupancy_Classified | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-08-16 11:16:33 | 36.3 | 0.1 | 1701.0 | 0.0 | 44.0 | 65.0 | 70.0 | 26.1 | 14.1 | 72.1 | 26.0 | middle | ac | lab | 2.0 | sunny | 2 |
| 1 | 2023-08-16 11:16:38 | 36.1 | 0.1 | 1699.0 | 0.0 | 44.0 | 64.0 | 69.0 | 26.1 | 14.1 | 70.5 | 26.0 | middle | ac | lab | 2.0 | sunny | 2 |
| 2 | 2023-08-16 11:16:44 | 36.5 | 0.1 | 1695.0 | 0.0 | 44.0 | 64.0 | 69.0 | 26.1 | 14.1 | 77.2 | 26.0 | middle | ac | lab | 2.0 | sunny | 2 |
| 3 | 2023-08-16 11:16:50 | 34.2 | 0.1 | 1690.0 | 0.0 | 43.0 | 64.0 | 68.0 | 26.1 | 14.1 | 72.2 | 26.0 | middle | ac | lab | 2.0 | sunny | 2 |
| 4 | 2023-08-16 11:16:55 | 36.3 | 0.1 | 1684.0 | 0.0 | 43.0 | 64.0 | 67.0 | 26.1 | 14.0 | 75.3 | 26.0 | middle | ac | lab | 2.0 | sunny | 2 |
In [ ]:
df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
Out[ ]:
<Axes: xlabel='Occupancy_Classified'>
In [ ]:
# Find the columns which contain strings
for column in df.columns:
if pd.api.types.is_string_dtype(df[column]):
print(column)
Position Room Condition Room Type Weather
In [ ]:
{i: pd.api.types.is_string_dtype(df[i]) for i in df.columns}
Out[ ]:
{'Timestamp': False,
'CO (ppm)': False,
'NO2 (ppm)': False,
'CO2 (ppm)': False,
'TVOC (ppb)': False,
'PM1 (ug/m3)': False,
'PM2.5 (ug/m3)': False,
'PM10 (ug/m3)': False,
'Temperature (C)': False,
'Humidity (%)': False,
'Sound (dB)': False,
'Occupancy': False,
'Position': True,
'Room Condition': True,
'Room Type': True,
'Floor No.': False,
'Weather': True,
'Occupancy_Classified': False}
In [ ]:
for column in df.columns:
if pd.api.types.is_string_dtype(df[column]):
df[column] = df[column].astype('category').cat.as_ordered()
In [ ]:
# Check missing data ratio percentage
df.isna().sum() * 100.00 / len(df)
Out[ ]:
Timestamp 0.000000 CO (ppm) 24.962204 NO2 (ppm) 24.962204 CO2 (ppm) 3.855199 TVOC (ppb) 0.000000 PM1 (ug/m3) 0.000000 PM2.5 (ug/m3) 0.000000 PM10 (ug/m3) 0.000000 Temperature (C) 0.000000 Humidity (%) 0.000000 Sound (dB) 0.000000 Occupancy 0.000000 Position 0.000000 Room Condition 0.000000 Room Type 0.000000 Floor No. 0.000000 Weather 0.000000 Occupancy_Classified 0.000000 dtype: float64
In [ ]:
df.isna().sum()
Out[ ]:
Timestamp 0 CO (ppm) 2972 NO2 (ppm) 2972 CO2 (ppm) 459 TVOC (ppb) 0 PM1 (ug/m3) 0 PM2.5 (ug/m3) 0 PM10 (ug/m3) 0 Temperature (C) 0 Humidity (%) 0 Sound (dB) 0 Occupancy 0 Position 0 Room Condition 0 Room Type 0 Floor No. 0 Weather 0 Occupancy_Classified 0 dtype: int64
In [ ]:
for column in df.columns:
if pd.api.types.is_numeric_dtype(df[column]):
print(column)
CO (ppm) NO2 (ppm) CO2 (ppm) TVOC (ppb) PM1 (ug/m3) PM2.5 (ug/m3) PM10 (ug/m3) Temperature (C) Humidity (%) Sound (dB) Occupancy Floor No. Occupancy_Classified
In [ ]:
# Check for which numeric columns have null values
for column in df.columns:
if pd.api.types.is_numeric_dtype(df[column]) and df[column].isna().sum():
print(column)
CO (ppm) NO2 (ppm) CO2 (ppm)
In [ ]:
ax = df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
for container in ax.containers:
ax.bar_label(container, fmt='%d', label_type='edge')
plt.xticks(rotation = 0);
In [ ]:
# Dropping all missing rows
new_df = df.dropna().reset_index(drop = True)
In [ ]:
new_df.shape
Out[ ]:
(8907, 18)
In [ ]:
ax = new_df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
for container in ax.containers:
ax.bar_label(container, fmt='%d', label_type='edge')
plt.xticks(rotation = 0);
In [ ]:
# Check for columns which aren't numeric or is categorical
# and print category codes
l = 0
for column in new_df.columns:
if pd.api.types.is_categorical_dtype(new_df[column]):
l += 1
print(f'{column}: {dict(enumerate(df[column].cat.categories))}')
print(l)
Position: {0: 'backside', 1: 'frontside', 2: 'middle'}
Room Condition: {0: 'ac', 1: 'non ac'}
Room Type: {0: 'classroom', 1: 'lab'}
Weather: {0: 'cloudy', 1: 'overcast', 2: 'rainy', 3: 'sunny'}
4
In [ ]:
# Turn all categorical variables into numbers and fill missing
for column in new_df.columns:
if pd.api.types.is_categorical_dtype(new_df[column]):
# Turn categories into numbers and add +1
new_df[column] = pd.Categorical(new_df[column]).codes + 1
ML¶
Original ML Pipeline Module¶
In [ ]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
import warnings
import time
warnings.filterwarnings("ignore")
np.random.seed(42)
class MultiModelEvaluator:
def __init__(self, models):
self.models = models
self.model_names = list(models.keys())
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
self.metric_scores = {}
def split_data(self, X, y, test_size=0.2, random_state=42):
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
def train_models(self):
for model_name, model in self.models.items():
print(f"\n================================================\n{model_name} model has started training")
start = time.time()
pipeline = Pipeline([
('scaler', StandardScaler()),
('classifier', model)
])
pipeline.fit(self.X_train, self.y_train)
self.models[model_name] = pipeline
print(f"{model_name} model has ended training. Time -> {round(time.time() - start, 2)}s. Accuracy - > {(pipeline.score(self.X_test, self.y_test) * 100.00):.2f} %\n================================================\n")
def evaluate_models(self, X_test, y_test):
for model_name, pipeline in self.models.items():
Train_y_pred = pipeline.predict(self.X_train)
Train_accuracy = accuracy_score(self.y_train, Train_y_pred)
Train_f1_macro = f1_score(self.y_train, Train_y_pred, average='macro')
Train_f1_weighted = f1_score(self.y_train, Train_y_pred, average='weighted')
Train_recall_macro = recall_score(self.y_train, Train_y_pred, average='macro')
Train_recall_weighted = recall_score(self.y_train, Train_y_pred, average='weighted')
Train_precision_macro = precision_score(self.y_train, Train_y_pred, average='macro')
Train_precision_weighted = precision_score(self.y_train, Train_y_pred, average='weighted')
Train_confusion = confusion_matrix(self.y_train, Train_y_pred)
Test_y_pred = pipeline.predict(X_test)
Test_accuracy = accuracy_score(y_test, Test_y_pred)
Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
Test_confusion = confusion_matrix(y_test, Test_y_pred)
self.metric_scores[model_name] = {
'Train Accuracy': Train_accuracy,
'Train F1 Macro': Train_f1_macro,
'Train F1 Weighted': Train_f1_weighted,
'Train Recall Macro': Train_recall_macro,
'Train Recall Weighted': Train_recall_weighted,
'Train Precision Macro': Train_precision_macro,
'Train Precision Weighted': Train_precision_weighted,
'Train Confusion Matrix': Train_confusion,
'Test Accuracy': Test_accuracy,
'Test F1 Macro': Test_f1_macro,
'Test F1 Weighted': Test_f1_weighted,
'Test Recall Macro': Test_recall_macro,
'Test Recall Weighted': Test_recall_weighted,
'Test Precision Macro': Test_precision_macro,
'Test Precision Weighted': Test_precision_weighted,
'Test Confusion Matrix': Test_confusion
}
def get_metric_scores(self, model_name):
return self.metric_scores.get(model_name, {})
Hyper-Tuned Pipeline Module¶
In [ ]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
import warnings
import numpy as np
import time
warnings.filterwarnings("ignore")
np.random.seed(42)
class MultiModelEvaluatorWithTuning:
def __init__(self, models, param_grids, n_iter_values = {}, n_jobs_values = {}, verbose_values = {}):
self.models = models
self.model_names = list(models.keys())
self.param_grids = param_grids
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
self.n_iter_values = n_iter_values
self.n_jobs_values = n_jobs_values
self.verbose_values = verbose_values
self.metric_scores = {}
self.best_params = {}
def split_data(self, X, y, test_size=0.2, random_state=42):
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
def train_models(self):
for model_name, model in self.models.items():
print(f"\n================================================\n{model_name} tuned model has started training")
start = time.time()
pipeline = Pipeline([
('scaler', StandardScaler()),
('classifier', model)
])
if model_name not in self.n_iter_values:
search = GridSearchCV(pipeline, self.param_grids[model_name], cv=5, n_jobs = self.n_jobs_values.get(model_name, -1), verbose = self.verbose_values.get(model_name, 1))
else:
search = RandomizedSearchCV(pipeline, self.param_grids[model_name], cv=5, n_jobs = self.n_jobs_values.get(model_name, -1), n_iter = self.n_iter_values.get(model_name, 10), verbose = self.verbose_values.get(model_name, 1))
search.fit(self.X_train, self.y_train)
best_model = search.best_estimator_
self.models[model_name] = best_model
self.best_params[model_name] = search.best_params_
print(f"{model_name} tuned model has ended training. Time -> {round(time.time() - start, 2)}s. Accuracy - > {(best_model.score(self.X_test, self.y_test) * 100.00):.2f} %\n================================================\n")
def evaluate_models(self, X_test, y_test):
for model_name, pipeline in self.models.items():
Train_y_pred = pipeline.predict(self.X_train)
Train_accuracy = accuracy_score(self.y_train, Train_y_pred)
Train_f1_macro = f1_score(self.y_train, Train_y_pred, average='macro')
Train_f1_weighted = f1_score(self.y_train, Train_y_pred, average='weighted')
Train_recall_macro = recall_score(self.y_train, Train_y_pred, average='macro')
Train_recall_weighted = recall_score(self.y_train, Train_y_pred, average='weighted')
Train_precision_macro = precision_score(self.y_train, Train_y_pred, average='macro')
Train_precision_weighted = precision_score(self.y_train, Train_y_pred, average='weighted')
Train_confusion = confusion_matrix(self.y_train, Train_y_pred)
Test_y_pred = pipeline.predict(X_test)
Test_accuracy = accuracy_score(y_test, Test_y_pred)
Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
Test_confusion = confusion_matrix(y_test, Test_y_pred)
self.metric_scores[model_name] = {
'Train Accuracy': Train_accuracy,
'Train F1 Macro': Train_f1_macro,
'Train F1 Weighted': Train_f1_weighted,
'Train Recall Macro': Train_recall_macro,
'Train Recall Weighted': Train_recall_weighted,
'Train Precision Macro': Train_precision_macro,
'Train Precision Weighted': Train_precision_weighted,
'Train Confusion Matrix': Train_confusion,
'Test Accuracy': Test_accuracy,
'Test F1 Macro': Test_f1_macro,
'Test F1 Weighted': Test_f1_weighted,
'Test Recall Macro': Test_recall_macro,
'Test Recall Weighted': Test_recall_weighted,
'Test Precision Macro': Test_precision_macro,
'Test Precision Weighted': Test_precision_weighted,
'Test Confusion Matrix': Test_confusion
}
def get_metric_scores(self, model_name):
return self.metric_scores.get(model_name, {})
def get_best_params(self, model_name):
return self.best_params.get(model_name, {})
In [ ]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score
import matplotlib.pyplot as plt
class DeepModel(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size):
super(DeepModel, self).__init__()
layers = []
in_features = input_size
# Input normalization
layers.append(nn.BatchNorm1d(input_size))
for hidden_size in hidden_sizes:
layers.append(nn.Linear(in_features, hidden_size))
# Batch normalization for hidden layers
layers.append(nn.BatchNorm1d(hidden_size))
layers.append(nn.ReLU())
in_features = hidden_size
layers.append(nn.Linear(hidden_sizes[-1], output_size))
self.layers = nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
def ann_model(X, y, test_size = 0.2):
# Evaluate the best model on the test set
def evaluate_accuracy(model, data_loader):
model.eval()
total_correct = 0
total_samples = 0
with torch.no_grad():
for inputs, labels in data_loader:
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total_correct += (predicted == labels).sum().item()
total_samples += labels.size(0)
accuracy = total_correct / total_samples
return accuracy
# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)
# Convert data to PyTorch dataset
dataset = data.TensorDataset(X, y)
# Split the data into train, dev, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size, random_state=42)
X_dev, X_test, y_dev, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
# Create PyTorch DataLoader for each set
train_loader = data.DataLoader(data.TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
dev_loader = data.DataLoader(data.TensorDataset(X_dev, y_dev), batch_size=32, shuffle=False)
test_loader = data.DataLoader(data.TensorDataset(X_test, y_test), batch_size=32, shuffle=False)
# Define hyperparameters to try
learning_rate = 0.01
num_epochs = 50
# Function to train and evaluate the model
def train_and_evaluate(model, learning_rate):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
# Lists to store convergence data
train_losses = []
dev_losses = []
train_accuracies = []
dev_accuracies = []
# Training loop
for epoch in range(num_epochs):
model.train()
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# Compute and store losses and accuracies
train_loss = loss.item()
train_losses.append(train_loss)
train_acc = evaluate_accuracy(model, train_loader)
train_accuracies.append(train_acc)
dev_acc = evaluate_accuracy(model, dev_loader)
dev_accuracies.append(dev_acc)
print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}, Dev Accuracy: {dev_acc:.4f}")
# Plot convergence graph
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, num_epochs + 1), dev_accuracies, label='Dev Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
# Evaluation
model.eval()
total_correct = 0
total_samples = 0
predicted_labels = []
with torch.no_grad():
for inputs, labels in dev_loader:
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total_correct += (predicted == labels).sum().item()
total_samples += labels.size(0)
predicted_labels.extend(predicted.tolist())
accuracy = total_correct / total_samples
# Classification report and F1-score
print("Classification Report:")
print(classification_report(y_dev, predicted_labels))
f1 = f1_score(y_dev, predicted_labels, average='weighted')
print(f"F1-Score: {f1:.4f}")
return accuracy
# Define different configurations for hidden layers and units
hidden_layers_configs = [
[16, 8], # 2 hidden layers with 16 and 8 units
[32, 16, 8], # 3 hidden layers with 32, 16, and 8 units
[64, 32, 16, 8] # 4 hidden layers with 64, 32, 16, and 8 units
]
# Train and evaluate models with different configurations
best_accuracy = 0.0
best_model = None
for hidden_layers in hidden_layers_configs:
model = DeepModel(input_size=X.shape[1], hidden_sizes=hidden_layers, output_size=y.shape[0])
accuracy = train_and_evaluate(model, learning_rate)
print(f"Hidden layers configuration: {hidden_layers}, Accuracy: {accuracy}")
if accuracy > best_accuracy:
best_accuracy = accuracy
best_model = model
print(f"Best model hidden layers configuration: {best_model.layers}, Best accuracy: {best_accuracy}")
test_accuracy = evaluate_accuracy(best_model, test_loader)
print(f"Test Accuracy: {test_accuracy}")
return best_model
In [ ]:
def original_ml_pipeline_obj(x, y, test_size = 0.2):
# Define the machine learning models
models = {
'LogisticRegression': LogisticRegression(solver = 'liblinear'),
'KNN': KNeighborsClassifier(),
'SVM': SVC(kernel = 'rbf', gamma = 0.1, C = 1.0),
'Linear SVM': SVC(kernel="linear", C=0.025, random_state=42),
'DecisionTree': DecisionTreeClassifier(random_state = 42),
'RandomForest': RandomForestClassifier(n_estimators = 1000, random_state = 42),
'XGB': XGBClassifier(use_label_encoder = False),
'LGBM': LGBMClassifier(n_estimators = 1000, random_state = 42, n_jobs = -1, verbosity = -1),
'GradientBoosting': GradientBoostingClassifier(),
'MLP Neural Net': MLPClassifier(alpha=1, max_iter=1000, random_state=42),
"AdaBoost": AdaBoostClassifier(random_state=42),
"Naive Bayes": GaussianNB(),
"QDA": QuadraticDiscriminantAnalysis(),
}
# Initialize the MultiModelEvaluator
evaluator = MultiModelEvaluator(models)
# Split the data into training and testing sets
evaluator.split_data(x, y, test_size = test_size)
# Train the models
evaluator.train_models()
# Evaluate the models
evaluator.evaluate_models(evaluator.X_test, evaluator.y_test)
# Get metric scores for a specific model
# model_name = 'RandomForest'
# scores = evaluator.get_metric_scores(model_name)
# print(f'Metric Scores for Model {model_name}:')
# for metric, score in scores.items():
# print(f'{metric}: {score}')
return evaluator
In [ ]:
def hyper_tuned_ml_pipeline_obj(x, y, test_size = 0.2):
# Define the machine learning models
models = {
'LogisticRegression': LogisticRegression(solver = 'liblinear'),
'KNN': KNeighborsClassifier(),
'SVM': SVC(kernel = 'rbf', gamma = 0.1, C = 1.0),
'Linear SVM': SVC(kernel="linear", C=0.025, random_state=42),
'DecisionTree': DecisionTreeClassifier(random_state = 42),
'RandomForest': RandomForestClassifier(n_estimators = 1000, random_state = 42),
'XGB': XGBClassifier(use_label_encoder = False),
'LGBM': LGBMClassifier(n_estimators = 1000, random_state = 42, n_jobs = -1, verbosity = -1),
'GradientBoosting': GradientBoostingClassifier(),
'MLP Neural Net': MLPClassifier(alpha=1, max_iter=1000, random_state=42),
"AdaBoost": AdaBoostClassifier(random_state=42),
"Naive Bayes": GaussianNB(),
"QDA": QuadraticDiscriminantAnalysis(),
}
n_jobs_values = {
}
verbose_values = {
}
n_iter_values = {
'RandomForest': 10,
'XGB': 150,
'LGBM': 150,
'MLP Neural Net': 50,
}
# Define parameter grids for hyperparameter tuning
param_grids = {
'RandomForest': {'classifier__n_estimators': np.arange(100, 3000, 100), 'classifier__max_features': ['auto', 'sqrt'], 'classifier__max_depth': [2, 3, 5, 10, 15, None], 'classifier__min_samples_split': [2, 5, 10], 'classifier__min_samples_leaf': [1, 2, 4]},
'DecisionTree': {"classifier__criterion":("gini", "entropy"), "classifier__splitter":("best", "random"), "classifier__max_depth":np.arange(1, 21), "classifier__min_samples_split":[2, 3, 4], "classifier__min_samples_leaf":np.arange(1, 21)},
'GradientBoosting': {'classifier__n_estimators': [50, 100, 200]},
'KNN': {'classifier__n_neighbors': np.arange(1, 21)},
'XGB': {'classifier__max_depth': np.arange(1, 21), 'classifier__learning_rate': np.arange(0, 1.1, 0.1)},
'SVM': {'classifier__C': [0.1, 0.5, 1, 2, 5, 10, 20], 'classifier__kernel': ['rbf'], "classifier__gamma": [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1]},
'LogisticRegression': {'classifier__C': np.logspace(-4, 4, 20), "classifier__solver": ["liblinear"], 'classifier__penalty': ['l1', 'l2']},
'LGBM': {'classifier__learning_rate': np.logspace(np.log(0.01), np.log(1), num = 500, base=3), 'classifier__max_depth': np.arange(5, 15), 'classifier__n_estimators': np.arange(5, 35), 'classifier__num_leaves': np.arange(5, 50), 'classifier__boosting_type': ['gbdt', 'dart'], 'classifier__colsample_bytree': np.linspace(0.6, 1, 500),'classifier__reg_lambda': np.linspace(0, 1, 500)},
'AdaBoost' : {'classifier__n_estimators': [10, 50, 100, 500], 'classifier__learning_rate': [0.0001, 0.001, 0.01, 0.1, 1.0], 'classifier__random_state': [42]},
'Naive Bayes': {'classifier__var_smoothing': np.logspace(0,-9, num=100)},
'MLP Neural Net': {'classifier__hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30)], 'classifier__max_iter': [50, 100, 150],
'classifier__activation': ['tanh', 'relu'], 'classifier__solver': ['lbfgs', 'adam'], 'classifier__alpha': [0.0001, 0.05],
'classifier__learning_rate': ['constant','adaptive'], 'classifier__random_state': [42]},
'QDA': {'classifier__reg_param': [0.1, 0.2, 0.3, 0.4, 0.5]},
'Linear SVM': {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear']}
}
# Initialize the MultiModelEvaluatorWithTuning
evaluator = MultiModelEvaluatorWithTuning(models, param_grids, n_iter_values = n_iter_values, n_jobs_values = n_jobs_values, verbose_values = verbose_values)
# Split the data into training and testing sets
evaluator.split_data(x, y, test_size = test_size)
# Train the models with hyperparameter tuning
evaluator.train_models()
# Evaluate the models
evaluator.evaluate_models(evaluator.X_test, evaluator.y_test)
# Get metric scores for a specific model
# model_name = 'RandomForest'
# scores = evaluator_tuned.get_metric_scores(model_name)
# print(f'Metric Scores for Model {model_name}:')
# for metric, score in scores.items():
# print(f'{metric}: {score}')
return evaluator
In [ ]:
def evaluate_result(evaluator):
result = {'Model Name': [], 'Train Accuracy': [], 'Train F1 Macro': [], 'Train F1 Weighted': [], 'Train Recall Macro': [],
'Train Recall Weighted': [], 'Train Precision Macro': [], 'Train Precision Weighted': [],
'Test Accuracy': [], 'Test F1 Macro': [], 'Test F1 Weighted': [], 'Test Recall Macro': [],
'Test Recall Weighted': [], 'Test Precision Macro': [], 'Test Precision Weighted': []
}
for model_name in evaluator.metric_scores:
result['Model Name'].append(model_name)
for metric_name in list(result.keys())[1:]:
result[metric_name].append(round(evaluator.metric_scores[model_name][metric_name] * 100.00, 2))
result = pd.DataFrame(result).T
result.rename(columns=result.iloc[0], inplace = True)
result.drop(result.index[0], inplace = True)
result.index = pd.MultiIndex.from_tuples(
[('Train', metric.replace('Train ', '')) if i < 7 else ('Test', metric.replace('Test ', '')) for i, metric in enumerate(result.index)],
names=['', 'Metrics'])
result.index.names = ['', 'Metrics']
return result
In [ ]:
def evaluate_ann(model, X_train, X_test, y_train, y_test):
model.eval()
with torch.no_grad():
Train_y_pred = torch.max(model(torch.tensor(X_train, dtype=torch.float32)), 1)[1].numpy()
Train_accuracy = accuracy_score(y_train, Train_y_pred)
Train_f1_macro = f1_score(y_train, Train_y_pred, average='macro')
Train_f1_weighted = f1_score(y_train, Train_y_pred, average='weighted')
Train_recall_macro = recall_score(y_train, Train_y_pred, average='macro')
Train_recall_weighted = recall_score(y_train, Train_y_pred, average='weighted')
Train_precision_macro = precision_score(y_train, Train_y_pred, average='macro')
Train_precision_weighted = precision_score(y_train, Train_y_pred, average='weighted')
Train_confusion = confusion_matrix(y_train, Train_y_pred)
model.eval()
with torch.no_grad():
Test_y_pred = torch.max(model(torch.tensor(X_test, dtype=torch.float32)), 1)[1].numpy()
Test_accuracy = accuracy_score(y_test, Test_y_pred)
Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
Test_confusion = confusion_matrix(y_test, Test_y_pred)
metrics = {
'Train Accuracy': Train_accuracy,
'Train F1 Macro': Train_f1_macro,
'Train F1 Weighted': Train_f1_weighted,
'Train Recall Macro': Train_recall_macro,
'Train Recall Weighted': Train_recall_weighted,
'Train Precision Macro': Train_precision_macro,
'Train Precision Weighted': Train_precision_weighted,
'Train Confusion Matrix': Train_confusion,
'Test Accuracy': Test_accuracy,
'Test F1 Macro': Test_f1_macro,
'Test F1 Weighted': Test_f1_weighted,
'Test Recall Macro': Test_recall_macro,
'Test Recall Weighted': Test_recall_weighted,
'Test Precision Macro': Test_precision_macro,
'Test Precision Weighted': Test_precision_weighted,
'Test Confusion Matrix': Test_confusion
}
result = {'Model Name': [], 'Train Accuracy': [], 'Train F1 Macro': [], 'Train F1 Weighted': [], 'Train Recall Macro': [],
'Train Recall Weighted': [], 'Train Precision Macro': [], 'Train Precision Weighted': [],
'Test Accuracy': [], 'Test F1 Macro': [], 'Test F1 Weighted': [], 'Test Recall Macro': [],
'Test Recall Weighted': [], 'Test Precision Macro': [], 'Test Precision Weighted': []
}
result['Model Name'].append('ANN')
for metric_name in list(result.keys())[1:]:
result[metric_name].append(round(metrics[metric_name] * 100.00, 2))
result = pd.DataFrame(result).T
result.rename(columns=result.iloc[0], inplace = True)
result.drop(result.index[0], inplace = True)
result.index = pd.MultiIndex.from_tuples(
[('Train', metric.replace('Train ', '')) if i < 7 else ('Test', metric.replace('Test ', '')) for i, metric in enumerate(result.index)],
names=['', 'Metrics'])
result.index.names = ['', 'Metrics']
return metrics, result
In [ ]:
def plot_feature_importances(models, model_names, feature_names):
"""
Plot feature importances for a list of machine learning models.
Parameters:
- models (list): List of trained models.
- model_names (list): Names of the models for labeling in the plot.
- feature_names (list): Names of the features for labeling in the plot.
Returns:
- None
"""
num_models = len(models)
num_features = len(feature_names)
for i in range(num_models):
plt.figure(figsize=(10, 6))
if isinstance(models[i], DecisionTreeClassifier) or isinstance(models[i], RandomForestClassifier):
importances = models[i].feature_importances_
elif isinstance(models[i], XGBClassifier) or isinstance(models[i], AdaBoostClassifier):
importances = models[i].feature_importances_
elif isinstance(models[i], LGBMClassifier):
importances = models[i].feature_importances_
elif isinstance(models[i], LogisticRegression):
importances = np.abs(models[i].coef_[0])
elif isinstance(models[i], MLPClassifier):
importances = [np.sum(np.abs(layer), axis=1) for layer in [layer / np.linalg.norm(layer, ord=2, axis=0)
for layer in models[i].coefs_]][0]
# elif isinstance(models[i], KNeighborsClassifier) or isinstance(models[i], SVC):
# importances = [1] * num_features # KNN and SVM doesn't have feature importance, for instance let's say every feature importance is equal
elif isinstance(models[i], GradientBoostingClassifier):
importances = models[i].feature_importances_
else:
pass # KNN, SVM, Naive Bayes and QDA doesn't have feature importance
# raise ValueError(f"Unsupported model type: {type(models[i])}")
# Sort feature importances in descending order
sorted_indices = np.argsort(importances)[::-1]
sorted_importances = [importances[idx] for idx in sorted_indices] # Convert to a list of values
sorted_feature_names = [feature_names[idx] for idx in sorted_indices]
plt.bar(range(num_features), sorted_importances, tick_label=sorted_feature_names)
plt.title(f'Feature Importances for {model_names[i]}')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()
In [ ]:
# for feature scaling
from sklearn.preprocessing import StandardScaler
st_x = StandardScaler()
- Position: {1: 'backside', 2: 'frontside', 3: 'middle'}
- Room Condition: {1: 'ac', 2: 'non ac'}
- Room Type: {1: 'classroom', 2: 'lab'}
- Weather: {1: 'cloudy', 2: 'overcast', 3: 'rainy', 4: 'sunny'}
- Lab has always ac
- Classroom has ac and non-ac
AC Lab ML Machine Learning Model¶
In [ ]:
# Preparing data for ML
df_tmp = new_df.query("`Room Type` == 2")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
ac_lab_x = df_tmp.drop('Occupancy_Classified', axis = 1)
ac_lab_y = df_tmp['Occupancy_Classified'].values # converting to numpy array
# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
ac_lab_x = st_x.fit_transform(ac_lab_x)
In [ ]:
# @title Experimenting our dataset with Dimension Reduction Techniques (t-SNE)
!pip install openTSNE
from openTSNE import TSNE
n = 4
ac_lab_x = df_tmp.drop('Occupancy_Classified', axis = 1).to_numpy()
ac_lab_y = df_tmp['Occupancy_Classified'].values # converting to numpy array
model = TSNE(
n_components = n,
perplexity=25,
metric="euclidean",
n_jobs=-1,
random_state=42,
verbose=True,
)
tsne_data = model.fit(ac_lab_x)
tsne_data = np.vstack((tsne_data.T, ac_lab_y)).T
tsne_df = pd.DataFrame(data = tsne_data,
columns = [f'Dim_{i}' for i in range(1, n + 1)] + ['label'])
sns.pairplot(tsne_df, hue='label', palette="bright")
plt.show()
tsne_df.corr()['label']
# evaluator_ac_lab = original_ml_pipeline_obj(tsne_df.drop('label', axis = 1), tsne_df['label'], test_size = 0.4)
# result_ac_lab = evaluate_result(evaluator_ac_lab)
# result_ac_lab
# metric_to_show = 'Accuracy'
# ax = result_ac_lab.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
# plt.title(f'Train and Test {metric_to_show} for Different Models')
# plt.xlabel('Model')
# plt.ylabel(metric_to_show)
# plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
# plt.legend(loc='best')
# plt.grid(True)
# plt.show()
# ann_model_ac_lab = ann_model(tsne_data[:, :3], tsne_data[:, 3], test_size = 0.4)
# evaluate_ann(ann_model_ac_lab, *train_test_split(tsne_data[:, :3], tsne_data[:, 3], test_size=0.4, random_state=42))[1]
In [ ]:
evaluator_ac_lab = original_ml_pipeline_obj(ac_lab_x, ac_lab_y, test_size = 0.4)
================================================ LogisticRegression model has started training LogisticRegression model has ended training. Time -> 0.04s. Accuracy - > 76.65 % ================================================ ================================================ KNN model has started training KNN model has ended training. Time -> 0.01s. Accuracy - > 93.99 % ================================================ ================================================ SVM model has started training SVM model has ended training. Time -> 0.1s. Accuracy - > 83.67 % ================================================ ================================================ Linear SVM model has started training Linear SVM model has ended training. Time -> 0.11s. Accuracy - > 73.94 % ================================================ ================================================ DecisionTree model has started training DecisionTree model has ended training. Time -> 0.01s. Accuracy - > 97.29 % ================================================ ================================================ RandomForest model has started training RandomForest model has ended training. Time -> 3.26s. Accuracy - > 98.41 % ================================================ ================================================ XGB model has started training XGB model has ended training. Time -> 1.98s. Accuracy - > 98.17 % ================================================ ================================================ LGBM model has started training LGBM model has ended training. Time -> 4.11s. Accuracy - > 97.70 % ================================================ ================================================ GradientBoosting model has started training GradientBoosting model has ended training. Time -> 2.81s. Accuracy - > 97.88 % ================================================ ================================================ MLP Neural Net model has started training MLP Neural Net model has ended training. Time -> 2.69s. Accuracy - > 86.50 % ================================================ ================================================ AdaBoost model has started training AdaBoost model has ended training. Time -> 0.21s. Accuracy - > 43.51 % ================================================ ================================================ Naive Bayes model has started training Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 57.49 % ================================================ ================================================ QDA model has started training QDA model has ended training. Time -> 0.03s. Accuracy - > 34.91 % ================================================
In [ ]:
result_ac_lab = evaluate_result(evaluator_ac_lab)
result_ac_lab
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 75.18 | 97.29 | 83.32 | 73.25 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 86.31 | 42.25 | 56.45 | 33.4 |
| F1 Macro | 60.59 | 96.02 | 75.55 | 53.63 | 100.0 | 100.0 | 99.79 | 99.73 | 100.0 | 79.19 | 22.97 | 41.23 | 7.15 | |
| F1 Weighted | 72.13 | 97.28 | 82.42 | 69.2 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 85.64 | 31.42 | 50.4 | 16.72 | |
| Recall Macro | 60.89 | 95.45 | 72.76 | 56.39 | 100.0 | 100.0 | 99.79 | 99.88 | 100.0 | 77.41 | 33.63 | 50.38 | 14.29 | |
| Recall Weighted | 75.18 | 97.29 | 83.32 | 73.25 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 86.31 | 42.25 | 56.45 | 33.4 | |
| Precision Macro | 67.28 | 96.65 | 86.75 | 52.53 | 100.0 | 100.0 | 99.79 | 99.59 | 100.0 | 85.67 | 18.26 | 58.89 | 4.77 | |
| Precision Weighted | 71.66 | 97.29 | 84.91 | 66.94 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 87.01 | 26.59 | 64.75 | 11.15 | |
| Test | Accuracy | 76.65 | 93.99 | 83.67 | 73.94 | 97.29 | 98.41 | 98.17 | 97.7 | 97.88 | 86.5 | 43.51 | 57.49 | 34.91 |
| F1 Macro | 61.96 | 90.37 | 74.4 | 53.66 | 95.39 | 97.78 | 97.11 | 96.68 | 96.58 | 78.88 | 23.29 | 41.67 | 7.39 | |
| F1 Weighted | 73.78 | 93.96 | 82.78 | 70.18 | 97.29 | 98.41 | 98.17 | 97.7 | 97.87 | 85.87 | 33.21 | 52.43 | 18.06 | |
| Recall Macro | 61.53 | 89.84 | 71.37 | 55.85 | 95.3 | 97.68 | 96.87 | 96.45 | 96.0 | 77.28 | 33.61 | 51.34 | 14.29 | |
| Recall Weighted | 76.65 | 93.99 | 83.67 | 73.94 | 97.29 | 98.41 | 98.17 | 97.7 | 97.88 | 86.5 | 43.51 | 57.49 | 34.91 | |
| Precision Macro | 68.13 | 90.96 | 86.34 | 53.12 | 95.54 | 97.91 | 97.38 | 96.94 | 97.26 | 84.01 | 18.84 | 58.82 | 4.99 | |
| Precision Weighted | 72.93 | 93.96 | 85.18 | 68.18 | 97.32 | 98.42 | 98.19 | 97.72 | 97.89 | 86.93 | 28.76 | 66.36 | 12.18 |
In [ ]:
metric_to_show = 'Accuracy'
ax = result_ac_lab.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
evaluator_ac_lab_hyper_tuned = hyper_tuned_ml_pipeline_obj(ac_lab_x, ac_lab_y, test_size = 0.4)
================================================ LogisticRegression tuned model has started training Fitting 5 folds for each of 40 candidates, totalling 200 fits LogisticRegression tuned model has ended training. Time -> 109.5s. Accuracy - > 78.24 % ================================================ ================================================ KNN tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits KNN tuned model has ended training. Time -> 5.01s. Accuracy - > 96.52 % ================================================ ================================================ SVM tuned model has started training Fitting 5 folds for each of 49 candidates, totalling 245 fits SVM tuned model has ended training. Time -> 28.6s. Accuracy - > 93.93 % ================================================ ================================================ Linear SVM tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits Linear SVM tuned model has ended training. Time -> 2.17s. Accuracy - > 86.32 % ================================================ ================================================ DecisionTree tuned model has started training Fitting 5 folds for each of 4800 candidates, totalling 24000 fits DecisionTree tuned model has ended training. Time -> 138.99s. Accuracy - > 96.82 % ================================================ ================================================ RandomForest tuned model has started training Fitting 5 folds for each of 10 candidates, totalling 50 fits RandomForest tuned model has ended training. Time -> 197.37s. Accuracy - > 97.88 % ================================================ ================================================ XGB tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits XGB tuned model has ended training. Time -> 185.72s. Accuracy - > 98.23 % ================================================ ================================================ LGBM tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits LGBM tuned model has ended training. Time -> 172.55s. Accuracy - > 97.88 % ================================================ ================================================ GradientBoosting tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits GradientBoosting tuned model has ended training. Time -> 46.77s. Accuracy - > 97.82 % ================================================ ================================================ MLP Neural Net tuned model has started training Fitting 5 folds for each of 50 candidates, totalling 250 fits MLP Neural Net tuned model has ended training. Time -> 786.21s. Accuracy - > 94.93 % ================================================ ================================================ AdaBoost tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits AdaBoost tuned model has ended training. Time -> 59.32s. Accuracy - > 71.17 % ================================================ ================================================ Naive Bayes tuned model has started training Fitting 5 folds for each of 100 candidates, totalling 500 fits Naive Bayes tuned model has ended training. Time -> 2.17s. Accuracy - > 64.45 % ================================================ ================================================ QDA tuned model has started training Fitting 5 folds for each of 5 candidates, totalling 25 fits QDA tuned model has ended training. Time -> 0.21s. Accuracy - > 69.99 % ================================================
In [ ]:
result_ac_lab_hyper_tuned = evaluate_result(evaluator_ac_lab_hyper_tuned)
result_ac_lab_hyper_tuned
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 77.62 | 100.0 | 95.83 | 86.15 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.25 | 69.79 | 65.38 | 70.02 |
| F1 Macro | 70.58 | 100.0 | 94.37 | 80.13 | 100.0 | 99.4 | 99.79 | 99.73 | 100.0 | 95.69 | 53.34 | 60.57 | 59.54 | |
| F1 Weighted | 75.69 | 100.0 | 95.82 | 85.09 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.23 | 66.48 | 65.16 | 68.01 | |
| Recall Macro | 70.03 | 100.0 | 93.44 | 77.99 | 100.0 | 99.34 | 99.79 | 99.88 | 100.0 | 94.71 | 55.56 | 64.78 | 63.49 | |
| Recall Weighted | 77.62 | 100.0 | 95.83 | 86.15 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.25 | 69.79 | 65.38 | 70.02 | |
| Precision Macro | 75.52 | 100.0 | 95.51 | 88.78 | 100.0 | 99.47 | 99.79 | 99.59 | 100.0 | 96.87 | 58.95 | 65.15 | 59.2 | |
| Precision Weighted | 76.01 | 100.0 | 95.89 | 86.75 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.29 | 67.37 | 68.9 | 67.77 | |
| Test | Accuracy | 78.24 | 96.52 | 93.93 | 86.32 | 96.82 | 97.88 | 98.23 | 97.88 | 97.82 | 94.93 | 71.17 | 64.45 | 69.99 |
| F1 Macro | 69.87 | 95.01 | 91.52 | 80.41 | 94.21 | 97.41 | 97.22 | 97.05 | 96.53 | 91.72 | 54.42 | 58.22 | 58.62 | |
| F1 Weighted | 76.42 | 96.52 | 93.96 | 85.46 | 96.79 | 97.88 | 98.23 | 97.88 | 97.81 | 94.88 | 68.36 | 64.65 | 68.33 | |
| Recall Macro | 68.92 | 95.14 | 91.13 | 78.23 | 93.37 | 97.45 | 96.96 | 96.88 | 95.93 | 90.65 | 55.72 | 63.22 | 63.06 | |
| Recall Weighted | 78.24 | 96.52 | 93.93 | 86.32 | 96.82 | 97.88 | 98.23 | 97.88 | 97.82 | 94.93 | 71.17 | 64.45 | 69.99 | |
| Precision Macro | 75.92 | 94.89 | 92.09 | 88.26 | 95.27 | 97.41 | 97.51 | 97.24 | 97.24 | 92.97 | 63.21 | 62.25 | 59.02 | |
| Precision Weighted | 77.03 | 96.53 | 94.05 | 87.04 | 96.81 | 97.91 | 98.24 | 97.89 | 97.84 | 94.9 | 70.74 | 68.73 | 68.68 |
In [ ]:
metric_to_show = 'Accuracy'
result_ac_lab_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_lab_hyper_tuned.columns.to_numpy())), result_ac_lab_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
ann_model_ac_lab = ann_model(ac_lab_x, ac_lab_y, test_size = 0.4)
Epoch 1/50, Train Loss: 7.5679, Train Accuracy: 0.3088, Dev Accuracy: 0.3090 Epoch 2/50, Train Loss: 6.4552, Train Accuracy: 0.3812, Dev Accuracy: 0.3620 Epoch 3/50, Train Loss: 5.2430, Train Accuracy: 0.4555, Dev Accuracy: 0.4528 Epoch 4/50, Train Loss: 3.7336, Train Accuracy: 0.4500, Dev Accuracy: 0.4281 Epoch 5/50, Train Loss: 3.4900, Train Accuracy: 0.4772, Dev Accuracy: 0.4505 Epoch 6/50, Train Loss: 3.0107, Train Accuracy: 0.4941, Dev Accuracy: 0.4682 Epoch 7/50, Train Loss: 1.8069, Train Accuracy: 0.6424, Dev Accuracy: 0.6533 Epoch 8/50, Train Loss: 1.0936, Train Accuracy: 0.6900, Dev Accuracy: 0.6969 Epoch 9/50, Train Loss: 0.9546, Train Accuracy: 0.7781, Dev Accuracy: 0.7689 Epoch 10/50, Train Loss: 2.1245, Train Accuracy: 0.7789, Dev Accuracy: 0.7748 Epoch 11/50, Train Loss: 1.7963, Train Accuracy: 0.8029, Dev Accuracy: 0.8031 Epoch 12/50, Train Loss: 0.5580, Train Accuracy: 0.7939, Dev Accuracy: 0.7830 Epoch 13/50, Train Loss: 0.5155, Train Accuracy: 0.7974, Dev Accuracy: 0.7913 Epoch 14/50, Train Loss: 0.9153, Train Accuracy: 0.8151, Dev Accuracy: 0.8160 Epoch 15/50, Train Loss: 0.9304, Train Accuracy: 0.8006, Dev Accuracy: 0.7877 Epoch 16/50, Train Loss: 0.2263, Train Accuracy: 0.8045, Dev Accuracy: 0.7983 Epoch 17/50, Train Loss: 0.5137, Train Accuracy: 0.8017, Dev Accuracy: 0.8007 Epoch 18/50, Train Loss: 0.9767, Train Accuracy: 0.8175, Dev Accuracy: 0.8160 Epoch 19/50, Train Loss: 0.6474, Train Accuracy: 0.8124, Dev Accuracy: 0.8125 Epoch 20/50, Train Loss: 0.6372, Train Accuracy: 0.8403, Dev Accuracy: 0.8384 Epoch 21/50, Train Loss: 0.5639, Train Accuracy: 0.8183, Dev Accuracy: 0.8231 Epoch 22/50, Train Loss: 0.4555, Train Accuracy: 0.8297, Dev Accuracy: 0.8208 Epoch 23/50, Train Loss: 0.6009, Train Accuracy: 0.8084, Dev Accuracy: 0.7983 Epoch 24/50, Train Loss: 1.1390, Train Accuracy: 0.8143, Dev Accuracy: 0.8160 Epoch 25/50, Train Loss: 0.5916, Train Accuracy: 0.8139, Dev Accuracy: 0.8184 Epoch 26/50, Train Loss: 0.7654, Train Accuracy: 0.8269, Dev Accuracy: 0.8172 Epoch 27/50, Train Loss: 1.2820, Train Accuracy: 0.8348, Dev Accuracy: 0.8184 Epoch 28/50, Train Loss: 0.5775, Train Accuracy: 0.8289, Dev Accuracy: 0.8219 Epoch 29/50, Train Loss: 1.0680, Train Accuracy: 0.8206, Dev Accuracy: 0.8101 Epoch 30/50, Train Loss: 0.7345, Train Accuracy: 0.8245, Dev Accuracy: 0.8149 Epoch 31/50, Train Loss: 0.4530, Train Accuracy: 0.8395, Dev Accuracy: 0.8208 Epoch 32/50, Train Loss: 0.3788, Train Accuracy: 0.8230, Dev Accuracy: 0.8290 Epoch 33/50, Train Loss: 0.8172, Train Accuracy: 0.8289, Dev Accuracy: 0.8267 Epoch 34/50, Train Loss: 0.5156, Train Accuracy: 0.8360, Dev Accuracy: 0.8219 Epoch 35/50, Train Loss: 0.4037, Train Accuracy: 0.8344, Dev Accuracy: 0.8243 Epoch 36/50, Train Loss: 0.8318, Train Accuracy: 0.8183, Dev Accuracy: 0.8219 Epoch 37/50, Train Loss: 0.4988, Train Accuracy: 0.8285, Dev Accuracy: 0.8396 Epoch 38/50, Train Loss: 0.4708, Train Accuracy: 0.8242, Dev Accuracy: 0.8172 Epoch 39/50, Train Loss: 0.9334, Train Accuracy: 0.8407, Dev Accuracy: 0.8337 Epoch 40/50, Train Loss: 0.7260, Train Accuracy: 0.8423, Dev Accuracy: 0.8337 Epoch 41/50, Train Loss: 0.3315, Train Accuracy: 0.8356, Dev Accuracy: 0.8325 Epoch 42/50, Train Loss: 0.7459, Train Accuracy: 0.8308, Dev Accuracy: 0.8243 Epoch 43/50, Train Loss: 0.4402, Train Accuracy: 0.8375, Dev Accuracy: 0.8420 Epoch 44/50, Train Loss: 0.9257, Train Accuracy: 0.8340, Dev Accuracy: 0.8314 Epoch 45/50, Train Loss: 0.7405, Train Accuracy: 0.8238, Dev Accuracy: 0.8325 Epoch 46/50, Train Loss: 0.6862, Train Accuracy: 0.8560, Dev Accuracy: 0.8573 Epoch 47/50, Train Loss: 1.3168, Train Accuracy: 0.8438, Dev Accuracy: 0.8255 Epoch 48/50, Train Loss: 0.2608, Train Accuracy: 0.8450, Dev Accuracy: 0.8373 Epoch 49/50, Train Loss: 0.5127, Train Accuracy: 0.8301, Dev Accuracy: 0.8290 Epoch 50/50, Train Loss: 1.0933, Train Accuracy: 0.8320, Dev Accuracy: 0.8196
Classification Report:
precision recall f1-score support
0 0.85 0.96 0.90 292
1 0.78 0.79 0.78 110
2 0.82 0.85 0.83 152
3 0.95 0.81 0.87 73
4 0.83 0.31 0.45 49
5 0.73 0.84 0.78 148
6 0.00 0.00 0.00 24
accuracy 0.82 848
macro avg 0.71 0.65 0.66 848
weighted avg 0.80 0.82 0.80 848
F1-Score: 0.8002
Hidden layers configuration: [16, 8], Accuracy: 0.8195754716981132
Epoch 1/50, Train Loss: 7.4726, Train Accuracy: 0.3006, Dev Accuracy: 0.2771
Epoch 2/50, Train Loss: 6.1961, Train Accuracy: 0.5268, Dev Accuracy: 0.5024
Epoch 3/50, Train Loss: 4.4576, Train Accuracy: 0.5732, Dev Accuracy: 0.5401
Epoch 4/50, Train Loss: 2.5779, Train Accuracy: 0.5814, Dev Accuracy: 0.5531
Epoch 5/50, Train Loss: 2.0403, Train Accuracy: 0.5779, Dev Accuracy: 0.5495
Epoch 6/50, Train Loss: 2.7736, Train Accuracy: 0.6003, Dev Accuracy: 0.5696
Epoch 7/50, Train Loss: 2.2470, Train Accuracy: 0.6920, Dev Accuracy: 0.6781
Epoch 8/50, Train Loss: 1.6335, Train Accuracy: 0.7766, Dev Accuracy: 0.7818
Epoch 9/50, Train Loss: 1.1709, Train Accuracy: 0.7832, Dev Accuracy: 0.7889
Epoch 10/50, Train Loss: 1.7481, Train Accuracy: 0.8222, Dev Accuracy: 0.8184
Epoch 11/50, Train Loss: 0.3060, Train Accuracy: 0.8124, Dev Accuracy: 0.8137
Epoch 12/50, Train Loss: 0.7900, Train Accuracy: 0.7978, Dev Accuracy: 0.7983
Epoch 13/50, Train Loss: 0.5404, Train Accuracy: 0.8041, Dev Accuracy: 0.8031
Epoch 14/50, Train Loss: 0.7373, Train Accuracy: 0.8419, Dev Accuracy: 0.8384
Epoch 15/50, Train Loss: 0.6097, Train Accuracy: 0.8116, Dev Accuracy: 0.8137
Epoch 16/50, Train Loss: 2.0310, Train Accuracy: 0.7608, Dev Accuracy: 0.7547
Epoch 17/50, Train Loss: 0.5048, Train Accuracy: 0.8576, Dev Accuracy: 0.8479
Epoch 18/50, Train Loss: 0.4645, Train Accuracy: 0.8596, Dev Accuracy: 0.8550
Epoch 19/50, Train Loss: 1.8805, Train Accuracy: 0.7415, Dev Accuracy: 0.7347
Epoch 20/50, Train Loss: 0.6586, Train Accuracy: 0.8360, Dev Accuracy: 0.8349
Epoch 21/50, Train Loss: 1.2055, Train Accuracy: 0.8308, Dev Accuracy: 0.8208
Epoch 22/50, Train Loss: 0.3086, Train Accuracy: 0.8670, Dev Accuracy: 0.8656
Epoch 23/50, Train Loss: 0.4957, Train Accuracy: 0.8702, Dev Accuracy: 0.8573
Epoch 24/50, Train Loss: 1.1060, Train Accuracy: 0.8320, Dev Accuracy: 0.8160
Epoch 25/50, Train Loss: 1.3113, Train Accuracy: 0.8308, Dev Accuracy: 0.8066
Epoch 26/50, Train Loss: 0.7811, Train Accuracy: 0.8505, Dev Accuracy: 0.8443
Epoch 27/50, Train Loss: 0.6761, Train Accuracy: 0.8757, Dev Accuracy: 0.8691
Epoch 28/50, Train Loss: 0.5585, Train Accuracy: 0.8544, Dev Accuracy: 0.8479
Epoch 29/50, Train Loss: 0.5659, Train Accuracy: 0.8820, Dev Accuracy: 0.8691
Epoch 30/50, Train Loss: 0.8611, Train Accuracy: 0.8293, Dev Accuracy: 0.8125
Epoch 31/50, Train Loss: 0.6248, Train Accuracy: 0.8509, Dev Accuracy: 0.8491
Epoch 32/50, Train Loss: 0.9830, Train Accuracy: 0.8438, Dev Accuracy: 0.8479
Epoch 33/50, Train Loss: 0.6630, Train Accuracy: 0.8525, Dev Accuracy: 0.8443
Epoch 34/50, Train Loss: 0.5949, Train Accuracy: 0.8501, Dev Accuracy: 0.8491
Epoch 35/50, Train Loss: 0.3742, Train Accuracy: 0.8800, Dev Accuracy: 0.8703
Epoch 36/50, Train Loss: 0.4591, Train Accuracy: 0.8619, Dev Accuracy: 0.8467
Epoch 37/50, Train Loss: 0.4808, Train Accuracy: 0.8285, Dev Accuracy: 0.8137
Epoch 38/50, Train Loss: 0.3475, Train Accuracy: 0.8584, Dev Accuracy: 0.8526
Epoch 39/50, Train Loss: 1.3753, Train Accuracy: 0.7891, Dev Accuracy: 0.7606
Epoch 40/50, Train Loss: 0.6399, Train Accuracy: 0.8560, Dev Accuracy: 0.8408
Epoch 41/50, Train Loss: 0.5183, Train Accuracy: 0.8560, Dev Accuracy: 0.8608
Epoch 42/50, Train Loss: 1.0500, Train Accuracy: 0.8639, Dev Accuracy: 0.8502
Epoch 43/50, Train Loss: 0.8047, Train Accuracy: 0.8682, Dev Accuracy: 0.8561
Epoch 44/50, Train Loss: 0.6796, Train Accuracy: 0.8513, Dev Accuracy: 0.8455
Epoch 45/50, Train Loss: 0.6205, Train Accuracy: 0.8706, Dev Accuracy: 0.8691
Epoch 46/50, Train Loss: 0.2211, Train Accuracy: 0.8851, Dev Accuracy: 0.8703
Epoch 47/50, Train Loss: 1.2657, Train Accuracy: 0.8662, Dev Accuracy: 0.8573
Epoch 48/50, Train Loss: 0.5114, Train Accuracy: 0.8954, Dev Accuracy: 0.8939
Epoch 49/50, Train Loss: 0.7127, Train Accuracy: 0.8682, Dev Accuracy: 0.8691
Epoch 50/50, Train Loss: 0.2078, Train Accuracy: 0.8659, Dev Accuracy: 0.8502
Classification Report:
precision recall f1-score support
0 0.98 0.91 0.95 292
1 0.74 0.81 0.77 110
2 0.80 0.84 0.82 152
3 0.96 0.89 0.92 73
4 0.83 0.39 0.53 49
5 0.74 0.99 0.85 148
6 1.00 0.25 0.40 24
accuracy 0.85 848
macro avg 0.86 0.73 0.75 848
weighted avg 0.86 0.85 0.84 848
F1-Score: 0.8424
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8502358490566038
Epoch 1/50, Train Loss: 7.3063, Train Accuracy: 0.1459, Dev Accuracy: 0.1285
Epoch 2/50, Train Loss: 6.3056, Train Accuracy: 0.6743, Dev Accuracy: 0.6686
Epoch 3/50, Train Loss: 5.9427, Train Accuracy: 0.6998, Dev Accuracy: 0.6863
Epoch 4/50, Train Loss: 2.8393, Train Accuracy: 0.7065, Dev Accuracy: 0.7193
Epoch 5/50, Train Loss: 4.4799, Train Accuracy: 0.6448, Dev Accuracy: 0.6545
Epoch 6/50, Train Loss: 2.4085, Train Accuracy: 0.7557, Dev Accuracy: 0.7618
Epoch 7/50, Train Loss: 1.7595, Train Accuracy: 0.7958, Dev Accuracy: 0.8031
Epoch 8/50, Train Loss: 2.0559, Train Accuracy: 0.7911, Dev Accuracy: 0.7842
Epoch 9/50, Train Loss: 1.6194, Train Accuracy: 0.7876, Dev Accuracy: 0.7665
Epoch 10/50, Train Loss: 1.2184, Train Accuracy: 0.8438, Dev Accuracy: 0.8455
Epoch 11/50, Train Loss: 0.7580, Train Accuracy: 0.8249, Dev Accuracy: 0.8302
Epoch 12/50, Train Loss: 0.6332, Train Accuracy: 0.8131, Dev Accuracy: 0.7936
Epoch 13/50, Train Loss: 1.1190, Train Accuracy: 0.8281, Dev Accuracy: 0.8196
Epoch 14/50, Train Loss: 1.3169, Train Accuracy: 0.8509, Dev Accuracy: 0.8467
Epoch 15/50, Train Loss: 0.4554, Train Accuracy: 0.8415, Dev Accuracy: 0.8373
Epoch 16/50, Train Loss: 0.5193, Train Accuracy: 0.8552, Dev Accuracy: 0.8538
Epoch 17/50, Train Loss: 1.1831, Train Accuracy: 0.8316, Dev Accuracy: 0.8160
Epoch 18/50, Train Loss: 1.5159, Train Accuracy: 0.8509, Dev Accuracy: 0.8396
Epoch 19/50, Train Loss: 0.5843, Train Accuracy: 0.8635, Dev Accuracy: 0.8455
Epoch 20/50, Train Loss: 0.4796, Train Accuracy: 0.8682, Dev Accuracy: 0.8644
Epoch 21/50, Train Loss: 0.5932, Train Accuracy: 0.8371, Dev Accuracy: 0.8219
Epoch 22/50, Train Loss: 1.0192, Train Accuracy: 0.8619, Dev Accuracy: 0.8550
Epoch 23/50, Train Loss: 0.9314, Train Accuracy: 0.8501, Dev Accuracy: 0.8420
Epoch 24/50, Train Loss: 0.4399, Train Accuracy: 0.8564, Dev Accuracy: 0.8408
Epoch 25/50, Train Loss: 0.4603, Train Accuracy: 0.8887, Dev Accuracy: 0.8844
Epoch 26/50, Train Loss: 0.2975, Train Accuracy: 0.8615, Dev Accuracy: 0.8479
Epoch 27/50, Train Loss: 0.8075, Train Accuracy: 0.8588, Dev Accuracy: 0.8573
Epoch 28/50, Train Loss: 0.5097, Train Accuracy: 0.8529, Dev Accuracy: 0.8443
Epoch 29/50, Train Loss: 0.8670, Train Accuracy: 0.8792, Dev Accuracy: 0.8585
Epoch 30/50, Train Loss: 0.3063, Train Accuracy: 0.8643, Dev Accuracy: 0.8526
Epoch 31/50, Train Loss: 0.8306, Train Accuracy: 0.8816, Dev Accuracy: 0.8762
Epoch 32/50, Train Loss: 0.8839, Train Accuracy: 0.8611, Dev Accuracy: 0.8455
Epoch 33/50, Train Loss: 0.5090, Train Accuracy: 0.8647, Dev Accuracy: 0.8361
Epoch 34/50, Train Loss: 0.4229, Train Accuracy: 0.8721, Dev Accuracy: 0.8667
Epoch 35/50, Train Loss: 0.3537, Train Accuracy: 0.8769, Dev Accuracy: 0.8726
Epoch 36/50, Train Loss: 0.7091, Train Accuracy: 0.8839, Dev Accuracy: 0.8797
Epoch 37/50, Train Loss: 0.4488, Train Accuracy: 0.8670, Dev Accuracy: 0.8667
Epoch 38/50, Train Loss: 0.4762, Train Accuracy: 0.8729, Dev Accuracy: 0.8679
Epoch 39/50, Train Loss: 0.9091, Train Accuracy: 0.8725, Dev Accuracy: 0.8750
Epoch 40/50, Train Loss: 0.5778, Train Accuracy: 0.8792, Dev Accuracy: 0.8632
Epoch 41/50, Train Loss: 0.6472, Train Accuracy: 0.8682, Dev Accuracy: 0.8550
Epoch 42/50, Train Loss: 0.2584, Train Accuracy: 0.8698, Dev Accuracy: 0.8691
Epoch 43/50, Train Loss: 0.7754, Train Accuracy: 0.8792, Dev Accuracy: 0.8715
Epoch 44/50, Train Loss: 0.8772, Train Accuracy: 0.8702, Dev Accuracy: 0.8667
Epoch 45/50, Train Loss: 0.2729, Train Accuracy: 0.8686, Dev Accuracy: 0.8620
Epoch 46/50, Train Loss: 0.7849, Train Accuracy: 0.8887, Dev Accuracy: 0.8844
Epoch 47/50, Train Loss: 0.2453, Train Accuracy: 0.8741, Dev Accuracy: 0.8514
Epoch 48/50, Train Loss: 1.1743, Train Accuracy: 0.8607, Dev Accuracy: 0.8325
Epoch 49/50, Train Loss: 0.4111, Train Accuracy: 0.8584, Dev Accuracy: 0.8514
Epoch 50/50, Train Loss: 0.4506, Train Accuracy: 0.8765, Dev Accuracy: 0.8703
Classification Report:
precision recall f1-score support
0 0.95 0.95 0.95 292
1 0.73 0.85 0.79 110
2 0.90 0.80 0.84 152
3 0.96 0.89 0.92 73
4 0.65 0.53 0.58 49
5 0.85 0.91 0.88 148
6 0.74 0.83 0.78 24
accuracy 0.87 848
macro avg 0.83 0.82 0.82 848
weighted avg 0.87 0.87 0.87 848
F1-Score: 0.8697
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.8702830188679245
Best model hidden layers configuration: Sequential(
(0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=9, out_features=64, bias=True)
(2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Linear(in_features=64, out_features=32, bias=True)
(5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Linear(in_features=32, out_features=16, bias=True)
(8): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU()
(10): Linear(in_features=16, out_features=8, bias=True)
(11): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU()
(13): Linear(in_features=8, out_features=4238, bias=True)
), Best accuracy: 0.8702830188679245
Test Accuracy: 0.8856132075471698
In [ ]:
evaluate_ann(ann_model_ac_lab, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
Out[ ]:
| ANN | ||
|---|---|---|
| Metrics | ||
| Train | Accuracy | 88.12 |
| F1 Macro | 82.65 | |
| F1 Weighted | 87.59 | |
| Recall Macro | 80.43 | |
| Recall Weighted | 88.12 | |
| Precision Macro | 87.01 | |
| Precision Weighted | 88.5 | |
| Test | Accuracy | 87.38 |
| F1 Macro | 81.12 | |
| F1 Weighted | 86.96 | |
| Recall Macro | 78.81 | |
| Recall Weighted | 87.38 | |
| Precision Macro | 85.23 | |
| Precision Weighted | 87.7 |
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_ac_lab_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_ac_lab.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
plt.show();
In [ ]:
model_names, models = [], []
for model_name in evaluator_ac_lab_hyper_tuned.model_names:
model_names.append(model_name)
models.append(evaluator_ac_lab_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
Objects ->
- evaluator_ac_lab
- evaluator_ac_lab_hyper_tuned
Results ->
- result_ac_lab
- result_ac_lab_hyper_tuned
AC Classroom ML Machine Learning Model¶
In [ ]:
# Preapring data for ML
df_tmp = new_df.query("`Room Type` == 1 and `Room Condition` == 1")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
ac_classroom_x = df_tmp.drop('Occupancy_Classified', axis = 1)
ac_classroom_y = df_tmp['Occupancy_Classified'].values # converting to numpy array
# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
ac_classroom_x = st_x.fit_transform(ac_classroom_x)
In [ ]:
In [ ]:
# ML Training without hyper tuned
evaluator_ac_classroom = original_ml_pipeline_obj(ac_classroom_x, ac_classroom_y, test_size = 0.4)
================================================ LogisticRegression model has started training LogisticRegression model has ended training. Time -> 0.03s. Accuracy - > 73.10 % ================================================ ================================================ KNN model has started training KNN model has ended training. Time -> 0.0s. Accuracy - > 93.75 % ================================================ ================================================ SVM model has started training SVM model has ended training. Time -> 0.08s. Accuracy - > 83.35 % ================================================ ================================================ Linear SVM model has started training Linear SVM model has ended training. Time -> 0.11s. Accuracy - > 69.86 % ================================================ ================================================ DecisionTree model has started training DecisionTree model has ended training. Time -> 0.01s. Accuracy - > 97.66 % ================================================ ================================================ RandomForest model has started training RandomForest model has ended training. Time -> 4.18s. Accuracy - > 97.59 % ================================================ ================================================ XGB model has started training XGB model has ended training. Time -> 1.09s. Accuracy - > 97.51 % ================================================ ================================================ LGBM model has started training LGBM model has ended training. Time -> 4.4s. Accuracy - > 97.59 % ================================================ ================================================ GradientBoosting model has started training GradientBoosting model has ended training. Time -> 2.1s. Accuracy - > 97.81 % ================================================ ================================================ MLP Neural Net model has started training MLP Neural Net model has ended training. Time -> 2.65s. Accuracy - > 84.40 % ================================================ ================================================ AdaBoost model has started training AdaBoost model has ended training. Time -> 0.2s. Accuracy - > 50.26 % ================================================ ================================================ Naive Bayes model has started training Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 34.06 % ================================================ ================================================ QDA model has started training QDA model has ended training. Time -> 0.0s. Accuracy - > 2.19 % ================================================
In [ ]:
result_ac_classroom = evaluate_result(evaluator_ac_classroom)
result_ac_classroom
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 72.55 | 96.28 | 81.6 | 67.67 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 82.45 | 51.63 | 33.69 | 2.87 |
| F1 Macro | 64.14 | 95.59 | 72.96 | 48.36 | 99.73 | 99.73 | 99.6 | 99.43 | 99.27 | 72.35 | 39.43 | 31.88 | 0.93 | |
| F1 Weighted | 70.28 | 96.27 | 79.18 | 64.7 | 99.8 | 99.8 | 99.6 | 99.5 | 99.49 | 79.88 | 40.88 | 22.22 | 0.16 | |
| Recall Macro | 63.54 | 95.43 | 73.61 | 50.68 | 99.77 | 99.56 | 99.45 | 99.28 | 98.95 | 72.76 | 41.17 | 49.44 | 16.67 | |
| Recall Weighted | 72.55 | 96.28 | 81.6 | 67.67 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 82.45 | 51.63 | 33.69 | 2.87 | |
| Precision Macro | 66.48 | 95.76 | 88.99 | 62.64 | 99.69 | 99.9 | 99.75 | 99.58 | 99.61 | 72.5 | 53.73 | 45.86 | 0.48 | |
| Precision Weighted | 68.57 | 96.27 | 83.33 | 65.35 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 77.74 | 46.8 | 61.21 | 0.08 | |
| Test | Accuracy | 73.1 | 93.75 | 83.35 | 69.86 | 97.66 | 97.59 | 97.51 | 97.59 | 97.81 | 84.4 | 50.26 | 34.06 | 2.19 |
| F1 Macro | 63.72 | 91.0 | 73.73 | 50.12 | 95.8 | 95.58 | 96.18 | 96.32 | 96.37 | 73.03 | 38.82 | 31.03 | 0.71 | |
| F1 Weighted | 71.31 | 93.78 | 81.47 | 67.45 | 97.71 | 97.58 | 97.52 | 97.61 | 97.82 | 82.33 | 39.26 | 22.91 | 0.09 | |
| Recall Macro | 62.02 | 90.87 | 73.63 | 50.75 | 96.99 | 95.05 | 96.52 | 96.55 | 96.23 | 72.65 | 40.38 | 48.55 | 16.67 | |
| Recall Weighted | 73.1 | 93.75 | 83.35 | 69.86 | 97.66 | 97.59 | 97.51 | 97.59 | 97.81 | 84.4 | 50.26 | 34.06 | 2.19 | |
| Precision Macro | 67.42 | 91.22 | 90.05 | 64.89 | 94.84 | 96.13 | 95.88 | 96.13 | 96.52 | 73.93 | 54.71 | 46.04 | 0.36 | |
| Precision Weighted | 70.03 | 93.86 | 84.53 | 67.62 | 97.83 | 97.58 | 97.55 | 97.65 | 97.85 | 80.49 | 46.79 | 64.49 | 0.05 |
In [ ]:
metric_to_show = 'Accuracy'
ax = result_ac_classroom.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_classroom.columns.to_numpy())), result_ac_classroom.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
evaluator_ac_classroom_hyper_tuned = hyper_tuned_ml_pipeline_obj(ac_classroom_x, ac_classroom_y, test_size = 0.4)
================================================ LogisticRegression tuned model has started training Fitting 5 folds for each of 40 candidates, totalling 200 fits LogisticRegression tuned model has ended training. Time -> 58.56s. Accuracy - > 75.96 % ================================================ ================================================ KNN tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits KNN tuned model has ended training. Time -> 2.58s. Accuracy - > 94.72 % ================================================ ================================================ SVM tuned model has started training Fitting 5 folds for each of 49 candidates, totalling 245 fits SVM tuned model has ended training. Time -> 26.1s. Accuracy - > 94.80 % ================================================ ================================================ Linear SVM tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits Linear SVM tuned model has ended training. Time -> 2.35s. Accuracy - > 79.50 % ================================================ ================================================ DecisionTree tuned model has started training Fitting 5 folds for each of 4800 candidates, totalling 24000 fits DecisionTree tuned model has ended training. Time -> 134.04s. Accuracy - > 97.06 % ================================================ ================================================ RandomForest tuned model has started training Fitting 5 folds for each of 10 candidates, totalling 50 fits RandomForest tuned model has ended training. Time -> 162.94s. Accuracy - > 97.59 % ================================================ ================================================ XGB tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits XGB tuned model has ended training. Time -> 161.99s. Accuracy - > 97.59 % ================================================ ================================================ LGBM tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits LGBM tuned model has ended training. Time -> 175.38s. Accuracy - > 97.51 % ================================================ ================================================ GradientBoosting tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits GradientBoosting tuned model has ended training. Time -> 34.87s. Accuracy - > 97.51 % ================================================ ================================================ MLP Neural Net tuned model has started training Fitting 5 folds for each of 50 candidates, totalling 250 fits MLP Neural Net tuned model has ended training. Time -> 726.53s. Accuracy - > 96.01 % ================================================ ================================================ AdaBoost tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits AdaBoost tuned model has ended training. Time -> 55.54s. Accuracy - > 56.52 % ================================================ ================================================ Naive Bayes tuned model has started training Fitting 5 folds for each of 100 candidates, totalling 500 fits Naive Bayes tuned model has ended training. Time -> 3.12s. Accuracy - > 57.27 % ================================================ ================================================ QDA tuned model has started training Fitting 5 folds for each of 5 candidates, totalling 25 fits QDA tuned model has ended training. Time -> 0.29s. Accuracy - > 76.79 % ================================================
In [ ]:
result_ac_classroom_hyper_tuned = evaluate_result(evaluator_ac_classroom_hyper_tuned)
result_ac_classroom_hyper_tuned
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 75.87 | 97.23 | 94.87 | 80.19 | 99.04 | 99.35 | 99.6 | 99.3 | 99.8 | 98.54 | 52.99 | 54.4 | 72.75 |
| F1 Macro | 72.98 | 96.43 | 92.7 | 78.53 | 98.47 | 99.09 | 99.6 | 99.1 | 99.73 | 98.15 | 39.83 | 52.32 | 71.13 | |
| F1 Weighted | 74.38 | 97.22 | 94.69 | 79.81 | 99.05 | 99.34 | 99.6 | 99.29 | 99.8 | 98.54 | 48.06 | 49.99 | 73.09 | |
| Recall Macro | 72.29 | 96.27 | 91.5 | 77.61 | 98.37 | 98.79 | 99.45 | 98.95 | 99.66 | 98.11 | 42.47 | 57.79 | 72.7 | |
| Recall Weighted | 75.87 | 97.23 | 94.87 | 80.19 | 99.04 | 99.35 | 99.6 | 99.3 | 99.8 | 98.54 | 52.99 | 54.4 | 72.75 | |
| Precision Macro | 84.69 | 96.61 | 94.64 | 80.14 | 98.57 | 99.4 | 99.75 | 99.25 | 99.8 | 98.19 | 46.55 | 56.94 | 72.67 | |
| Precision Weighted | 77.22 | 97.22 | 94.82 | 79.73 | 99.05 | 99.34 | 99.6 | 99.29 | 99.8 | 98.55 | 54.57 | 58.54 | 77.45 | |
| Test | Accuracy | 75.96 | 94.72 | 94.8 | 79.5 | 97.06 | 97.59 | 97.59 | 97.51 | 97.51 | 96.01 | 56.52 | 57.27 | 76.79 |
| F1 Macro | 73.3 | 91.94 | 92.45 | 75.84 | 95.02 | 96.06 | 96.26 | 96.07 | 95.86 | 94.25 | 42.46 | 53.0 | 74.12 | |
| F1 Weighted | 74.89 | 94.75 | 94.73 | 79.38 | 97.11 | 97.58 | 97.61 | 97.52 | 97.54 | 96.04 | 52.01 | 54.01 | 77.68 | |
| Recall Macro | 71.63 | 92.23 | 90.96 | 74.18 | 95.17 | 95.26 | 96.77 | 95.75 | 96.07 | 94.66 | 44.66 | 58.64 | 75.93 | |
| Recall Weighted | 75.96 | 94.72 | 94.8 | 79.5 | 97.06 | 97.59 | 97.59 | 97.51 | 97.51 | 96.01 | 56.52 | 57.27 | 76.79 | |
| Precision Macro | 84.57 | 91.74 | 94.26 | 77.79 | 95.06 | 96.91 | 95.8 | 96.41 | 95.7 | 93.92 | 50.62 | 57.86 | 75.49 | |
| Precision Weighted | 76.83 | 94.82 | 94.77 | 79.44 | 97.22 | 97.59 | 97.64 | 97.54 | 97.59 | 96.11 | 59.99 | 64.08 | 82.63 |
In [ ]:
metric_to_show = 'Accuracy'
result_ac_classroom_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_classroom_hyper_tuned.columns.to_numpy())), result_ac_classroom_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_ac_classroom_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_ac_classroom.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_ac_classroom.columns.to_numpy())), result_ac_classroom.columns.to_numpy(), rotation=90)
plt.show();
In [ ]:
model_names, models = [], []
for model_name in evaluator_ac_classroom_hyper_tuned.model_names:
model_names.append(model_name)
models.append(evaluator_ac_classroom_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
In [ ]:
ann_model_ac_classroom = ann_model(ac_classroom_x, ac_classroom_y, test_size = 0.4)
Epoch 1/50, Train Loss: 7.3231, Train Accuracy: 0.0000, Dev Accuracy: 0.0000 Epoch 2/50, Train Loss: 6.8273, Train Accuracy: 0.3927, Dev Accuracy: 0.3967 Epoch 3/50, Train Loss: 5.8184, Train Accuracy: 0.4414, Dev Accuracy: 0.4465 Epoch 4/50, Train Loss: 6.6608, Train Accuracy: 0.4520, Dev Accuracy: 0.4540 Epoch 5/50, Train Loss: 4.1816, Train Accuracy: 0.4706, Dev Accuracy: 0.4630 Epoch 6/50, Train Loss: 5.1708, Train Accuracy: 0.4741, Dev Accuracy: 0.4691 Epoch 7/50, Train Loss: 5.4508, Train Accuracy: 0.5767, Dev Accuracy: 0.5641 Epoch 8/50, Train Loss: 1.8186, Train Accuracy: 0.5918, Dev Accuracy: 0.5686 Epoch 9/50, Train Loss: 1.1433, Train Accuracy: 0.6687, Dev Accuracy: 0.6742 Epoch 10/50, Train Loss: 0.7894, Train Accuracy: 0.6973, Dev Accuracy: 0.6938 Epoch 11/50, Train Loss: 1.1286, Train Accuracy: 0.6516, Dev Accuracy: 0.6591 Epoch 12/50, Train Loss: 3.3833, Train Accuracy: 0.7220, Dev Accuracy: 0.7360 Epoch 13/50, Train Loss: 3.3087, Train Accuracy: 0.7134, Dev Accuracy: 0.7376 Epoch 14/50, Train Loss: 1.1667, Train Accuracy: 0.7074, Dev Accuracy: 0.7074 Epoch 15/50, Train Loss: 2.4644, Train Accuracy: 0.7667, Dev Accuracy: 0.7888 Epoch 16/50, Train Loss: 2.9475, Train Accuracy: 0.7728, Dev Accuracy: 0.7858 Epoch 17/50, Train Loss: 2.7459, Train Accuracy: 0.7345, Dev Accuracy: 0.7572 Epoch 18/50, Train Loss: 1.2099, Train Accuracy: 0.7944, Dev Accuracy: 0.8054 Epoch 19/50, Train Loss: 1.7360, Train Accuracy: 0.6541, Dev Accuracy: 0.6440 Epoch 20/50, Train Loss: 1.8859, Train Accuracy: 0.7858, Dev Accuracy: 0.8009 Epoch 21/50, Train Loss: 0.7349, Train Accuracy: 0.6556, Dev Accuracy: 0.6848 Epoch 22/50, Train Loss: 1.5846, Train Accuracy: 0.7702, Dev Accuracy: 0.7843 Epoch 23/50, Train Loss: 1.0822, Train Accuracy: 0.7944, Dev Accuracy: 0.8100 Epoch 24/50, Train Loss: 1.6786, Train Accuracy: 0.7144, Dev Accuracy: 0.7345 Epoch 25/50, Train Loss: 0.9919, Train Accuracy: 0.7567, Dev Accuracy: 0.7647 Epoch 26/50, Train Loss: 0.4078, Train Accuracy: 0.8064, Dev Accuracy: 0.8220 Epoch 27/50, Train Loss: 3.7707, Train Accuracy: 0.6385, Dev Accuracy: 0.6109 Epoch 28/50, Train Loss: 0.6523, Train Accuracy: 0.7919, Dev Accuracy: 0.8205 Epoch 29/50, Train Loss: 1.3765, Train Accuracy: 0.7717, Dev Accuracy: 0.7843 Epoch 30/50, Train Loss: 2.3608, Train Accuracy: 0.8074, Dev Accuracy: 0.8250 Epoch 31/50, Train Loss: 2.2627, Train Accuracy: 0.7898, Dev Accuracy: 0.8054 Epoch 32/50, Train Loss: 0.7710, Train Accuracy: 0.8175, Dev Accuracy: 0.8235 Epoch 33/50, Train Loss: 0.8699, Train Accuracy: 0.7883, Dev Accuracy: 0.8115 Epoch 34/50, Train Loss: 0.7134, Train Accuracy: 0.7969, Dev Accuracy: 0.8190 Epoch 35/50, Train Loss: 1.3125, Train Accuracy: 0.6787, Dev Accuracy: 0.6998 Epoch 36/50, Train Loss: 1.6866, Train Accuracy: 0.7823, Dev Accuracy: 0.7964 Epoch 37/50, Train Loss: 1.0597, Train Accuracy: 0.8195, Dev Accuracy: 0.8326 Epoch 38/50, Train Loss: 1.3165, Train Accuracy: 0.8110, Dev Accuracy: 0.8190 Epoch 39/50, Train Loss: 0.8309, Train Accuracy: 0.8039, Dev Accuracy: 0.8205 Epoch 40/50, Train Loss: 0.7335, Train Accuracy: 0.8009, Dev Accuracy: 0.8024 Epoch 41/50, Train Loss: 0.1936, Train Accuracy: 0.8074, Dev Accuracy: 0.8160 Epoch 42/50, Train Loss: 1.4569, Train Accuracy: 0.7964, Dev Accuracy: 0.8160 Epoch 43/50, Train Loss: 2.3033, Train Accuracy: 0.7712, Dev Accuracy: 0.7677 Epoch 44/50, Train Loss: 1.9026, Train Accuracy: 0.7144, Dev Accuracy: 0.7360 Epoch 45/50, Train Loss: 2.6893, Train Accuracy: 0.7506, Dev Accuracy: 0.7511 Epoch 46/50, Train Loss: 1.7714, Train Accuracy: 0.8084, Dev Accuracy: 0.8160 Epoch 47/50, Train Loss: 2.6553, Train Accuracy: 0.7778, Dev Accuracy: 0.7798 Epoch 48/50, Train Loss: 2.0022, Train Accuracy: 0.8220, Dev Accuracy: 0.8371 Epoch 49/50, Train Loss: 1.0888, Train Accuracy: 0.8160, Dev Accuracy: 0.8386 Epoch 50/50, Train Loss: 4.3663, Train Accuracy: 0.7949, Dev Accuracy: 0.8175
Classification Report:
precision recall f1-score support
0 0.92 0.92 0.92 13
1 1.00 0.88 0.93 80
2 0.33 0.03 0.06 32
3 0.66 0.94 0.78 149
4 0.87 0.85 0.86 265
5 0.87 0.75 0.81 124
accuracy 0.82 663
macro avg 0.78 0.73 0.73 663
weighted avg 0.82 0.82 0.80 663
F1-Score: 0.8036
Hidden layers configuration: [16, 8], Accuracy: 0.8174962292609351
Epoch 1/50, Train Loss: 7.5274, Train Accuracy: 0.1679, Dev Accuracy: 0.1312
Epoch 2/50, Train Loss: 7.5536, Train Accuracy: 0.3881, Dev Accuracy: 0.3635
Epoch 3/50, Train Loss: 6.8964, Train Accuracy: 0.6310, Dev Accuracy: 0.6591
Epoch 4/50, Train Loss: 5.9499, Train Accuracy: 0.6838, Dev Accuracy: 0.7044
Epoch 5/50, Train Loss: 5.3552, Train Accuracy: 0.6375, Dev Accuracy: 0.6621
Epoch 6/50, Train Loss: 3.2717, Train Accuracy: 0.6687, Dev Accuracy: 0.6998
Epoch 7/50, Train Loss: 3.5244, Train Accuracy: 0.6556, Dev Accuracy: 0.6802
Epoch 8/50, Train Loss: 3.8065, Train Accuracy: 0.7079, Dev Accuracy: 0.7315
Epoch 9/50, Train Loss: 3.8193, Train Accuracy: 0.7210, Dev Accuracy: 0.7406
Epoch 10/50, Train Loss: 1.2348, Train Accuracy: 0.7270, Dev Accuracy: 0.7617
Epoch 11/50, Train Loss: 1.3554, Train Accuracy: 0.7778, Dev Accuracy: 0.8069
Epoch 12/50, Train Loss: 0.9241, Train Accuracy: 0.7753, Dev Accuracy: 0.7888
Epoch 13/50, Train Loss: 2.0453, Train Accuracy: 0.6571, Dev Accuracy: 0.6471
Epoch 14/50, Train Loss: 2.5873, Train Accuracy: 0.7350, Dev Accuracy: 0.7647
Epoch 15/50, Train Loss: 2.6264, Train Accuracy: 0.6647, Dev Accuracy: 0.6546
Epoch 16/50, Train Loss: 2.7725, Train Accuracy: 0.6792, Dev Accuracy: 0.6998
Epoch 17/50, Train Loss: 1.4903, Train Accuracy: 0.7722, Dev Accuracy: 0.7934
Epoch 18/50, Train Loss: 0.9666, Train Accuracy: 0.6933, Dev Accuracy: 0.7149
Epoch 19/50, Train Loss: 2.0856, Train Accuracy: 0.7717, Dev Accuracy: 0.7949
Epoch 20/50, Train Loss: 1.2760, Train Accuracy: 0.7396, Dev Accuracy: 0.7662
Epoch 21/50, Train Loss: 0.4466, Train Accuracy: 0.7089, Dev Accuracy: 0.7376
Epoch 22/50, Train Loss: 2.6181, Train Accuracy: 0.5269, Dev Accuracy: 0.5309
Epoch 23/50, Train Loss: 2.8069, Train Accuracy: 0.7893, Dev Accuracy: 0.8175
Epoch 24/50, Train Loss: 2.5516, Train Accuracy: 0.7119, Dev Accuracy: 0.7149
Epoch 25/50, Train Loss: 1.6214, Train Accuracy: 0.6360, Dev Accuracy: 0.6531
Epoch 26/50, Train Loss: 1.3110, Train Accuracy: 0.7964, Dev Accuracy: 0.8205
Epoch 27/50, Train Loss: 0.3744, Train Accuracy: 0.7853, Dev Accuracy: 0.7994
Epoch 28/50, Train Loss: 1.0992, Train Accuracy: 0.7813, Dev Accuracy: 0.7707
Epoch 29/50, Train Loss: 1.7797, Train Accuracy: 0.6450, Dev Accuracy: 0.6787
Epoch 30/50, Train Loss: 1.3964, Train Accuracy: 0.7954, Dev Accuracy: 0.8281
Epoch 31/50, Train Loss: 1.7679, Train Accuracy: 0.7984, Dev Accuracy: 0.8039
Epoch 32/50, Train Loss: 1.1720, Train Accuracy: 0.7929, Dev Accuracy: 0.7873
Epoch 33/50, Train Loss: 1.6450, Train Accuracy: 0.7622, Dev Accuracy: 0.7421
Epoch 34/50, Train Loss: 1.5500, Train Accuracy: 0.7959, Dev Accuracy: 0.8145
Epoch 35/50, Train Loss: 0.7301, Train Accuracy: 0.7763, Dev Accuracy: 0.7858
Epoch 36/50, Train Loss: 2.4106, Train Accuracy: 0.7994, Dev Accuracy: 0.8100
Epoch 37/50, Train Loss: 2.5638, Train Accuracy: 0.7547, Dev Accuracy: 0.7315
Epoch 38/50, Train Loss: 1.1289, Train Accuracy: 0.7803, Dev Accuracy: 0.7813
Epoch 39/50, Train Loss: 1.0550, Train Accuracy: 0.8170, Dev Accuracy: 0.8190
Epoch 40/50, Train Loss: 0.2158, Train Accuracy: 0.8245, Dev Accuracy: 0.8416
Epoch 41/50, Train Loss: 3.4405, Train Accuracy: 0.6933, Dev Accuracy: 0.6968
Epoch 42/50, Train Loss: 2.3037, Train Accuracy: 0.8064, Dev Accuracy: 0.8235
Epoch 43/50, Train Loss: 1.4047, Train Accuracy: 0.7712, Dev Accuracy: 0.7888
Epoch 44/50, Train Loss: 1.5982, Train Accuracy: 0.7773, Dev Accuracy: 0.7919
Epoch 45/50, Train Loss: 1.3713, Train Accuracy: 0.8301, Dev Accuracy: 0.8477
Epoch 46/50, Train Loss: 4.0226, Train Accuracy: 0.7677, Dev Accuracy: 0.7828
Epoch 47/50, Train Loss: 4.0349, Train Accuracy: 0.8009, Dev Accuracy: 0.8069
Epoch 48/50, Train Loss: 1.4389, Train Accuracy: 0.8089, Dev Accuracy: 0.8205
Epoch 49/50, Train Loss: 1.2250, Train Accuracy: 0.7838, Dev Accuracy: 0.7934
Epoch 50/50, Train Loss: 0.1714, Train Accuracy: 0.8180, Dev Accuracy: 0.8235
Classification Report:
precision recall f1-score support
0 0.39 0.92 0.55 13
1 1.00 0.70 0.82 80
2 0.86 0.19 0.31 32
3 0.80 0.88 0.84 149
4 0.85 0.92 0.88 265
5 0.82 0.79 0.81 124
accuracy 0.82 663
macro avg 0.79 0.73 0.70 663
weighted avg 0.84 0.82 0.82 663
F1-Score: 0.8164
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8235294117647058
Epoch 1/50, Train Loss: 7.3086, Train Accuracy: 0.2700, Dev Accuracy: 0.2986
Epoch 2/50, Train Loss: 6.5482, Train Accuracy: 0.5043, Dev Accuracy: 0.5204
Epoch 3/50, Train Loss: 5.2745, Train Accuracy: 0.5380, Dev Accuracy: 0.5596
Epoch 4/50, Train Loss: 4.0197, Train Accuracy: 0.5691, Dev Accuracy: 0.5928
Epoch 5/50, Train Loss: 4.3301, Train Accuracy: 0.5259, Dev Accuracy: 0.5324
Epoch 6/50, Train Loss: 4.5292, Train Accuracy: 0.6269, Dev Accuracy: 0.6380
Epoch 7/50, Train Loss: 0.8850, Train Accuracy: 0.7471, Dev Accuracy: 0.7692
Epoch 8/50, Train Loss: 1.2019, Train Accuracy: 0.7883, Dev Accuracy: 0.8009
Epoch 9/50, Train Loss: 0.5390, Train Accuracy: 0.7743, Dev Accuracy: 0.7934
Epoch 10/50, Train Loss: 0.8082, Train Accuracy: 0.7949, Dev Accuracy: 0.8145
Epoch 11/50, Train Loss: 1.4296, Train Accuracy: 0.6948, Dev Accuracy: 0.7270
Epoch 12/50, Train Loss: 1.2200, Train Accuracy: 0.7677, Dev Accuracy: 0.7722
Epoch 13/50, Train Loss: 0.6294, Train Accuracy: 0.7476, Dev Accuracy: 0.7662
Epoch 14/50, Train Loss: 1.5068, Train Accuracy: 0.7692, Dev Accuracy: 0.7843
Epoch 15/50, Train Loss: 2.7010, Train Accuracy: 0.6787, Dev Accuracy: 0.7044
Epoch 16/50, Train Loss: 1.1173, Train Accuracy: 0.7763, Dev Accuracy: 0.7994
Epoch 17/50, Train Loss: 0.6024, Train Accuracy: 0.7783, Dev Accuracy: 0.8039
Epoch 18/50, Train Loss: 1.0757, Train Accuracy: 0.5234, Dev Accuracy: 0.4962
Epoch 19/50, Train Loss: 1.7490, Train Accuracy: 0.6491, Dev Accuracy: 0.6757
Epoch 20/50, Train Loss: 1.1336, Train Accuracy: 0.6350, Dev Accuracy: 0.6440
Epoch 21/50, Train Loss: 2.2274, Train Accuracy: 0.7717, Dev Accuracy: 0.7888
Epoch 22/50, Train Loss: 2.3650, Train Accuracy: 0.8276, Dev Accuracy: 0.8296
Epoch 23/50, Train Loss: 1.1242, Train Accuracy: 0.7763, Dev Accuracy: 0.7753
Epoch 24/50, Train Loss: 0.3762, Train Accuracy: 0.8100, Dev Accuracy: 0.8235
Epoch 25/50, Train Loss: 4.2830, Train Accuracy: 0.7768, Dev Accuracy: 0.7828
Epoch 26/50, Train Loss: 0.2204, Train Accuracy: 0.8311, Dev Accuracy: 0.8431
Epoch 27/50, Train Loss: 0.8347, Train Accuracy: 0.6817, Dev Accuracy: 0.6953
Epoch 28/50, Train Loss: 0.9250, Train Accuracy: 0.7587, Dev Accuracy: 0.7632
Epoch 29/50, Train Loss: 3.1317, Train Accuracy: 0.8215, Dev Accuracy: 0.8265
Epoch 30/50, Train Loss: 1.2892, Train Accuracy: 0.5847, Dev Accuracy: 0.5822
Epoch 31/50, Train Loss: 2.4246, Train Accuracy: 0.7863, Dev Accuracy: 0.7798
Epoch 32/50, Train Loss: 1.1728, Train Accuracy: 0.7547, Dev Accuracy: 0.7738
Epoch 33/50, Train Loss: 0.6297, Train Accuracy: 0.7999, Dev Accuracy: 0.8250
Epoch 34/50, Train Loss: 1.7944, Train Accuracy: 0.8205, Dev Accuracy: 0.8250
Epoch 35/50, Train Loss: 1.4668, Train Accuracy: 0.7280, Dev Accuracy: 0.7451
Epoch 36/50, Train Loss: 2.5585, Train Accuracy: 0.7577, Dev Accuracy: 0.7738
Epoch 37/50, Train Loss: 1.4770, Train Accuracy: 0.7577, Dev Accuracy: 0.7707
Epoch 38/50, Train Loss: 1.3612, Train Accuracy: 0.8205, Dev Accuracy: 0.8416
Epoch 39/50, Train Loss: 1.1249, Train Accuracy: 0.8200, Dev Accuracy: 0.8401
Epoch 40/50, Train Loss: 1.3318, Train Accuracy: 0.8135, Dev Accuracy: 0.8326
Epoch 41/50, Train Loss: 0.6945, Train Accuracy: 0.8019, Dev Accuracy: 0.7964
Epoch 42/50, Train Loss: 0.9786, Train Accuracy: 0.8617, Dev Accuracy: 0.8627
Epoch 43/50, Train Loss: 2.1722, Train Accuracy: 0.8135, Dev Accuracy: 0.8356
Epoch 44/50, Train Loss: 0.8308, Train Accuracy: 0.7773, Dev Accuracy: 0.8115
Epoch 45/50, Train Loss: 1.3235, Train Accuracy: 0.7421, Dev Accuracy: 0.7647
Epoch 46/50, Train Loss: 1.4209, Train Accuracy: 0.8351, Dev Accuracy: 0.8311
Epoch 47/50, Train Loss: 1.0057, Train Accuracy: 0.8100, Dev Accuracy: 0.8100
Epoch 48/50, Train Loss: 1.8800, Train Accuracy: 0.7853, Dev Accuracy: 0.7617
Epoch 49/50, Train Loss: 1.0815, Train Accuracy: 0.8079, Dev Accuracy: 0.8235
Epoch 50/50, Train Loss: 3.3087, Train Accuracy: 0.7788, Dev Accuracy: 0.7919
Classification Report:
precision recall f1-score support
0 0.23 0.92 0.37 13
1 1.00 0.44 0.61 80
2 0.58 0.44 0.50 32
3 0.77 0.91 0.83 149
4 0.91 0.83 0.87 265
5 0.81 0.88 0.84 124
accuracy 0.79 663
macro avg 0.72 0.74 0.67 663
weighted avg 0.84 0.79 0.80 663
F1-Score: 0.7964
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.7918552036199095
Best model hidden layers configuration: Sequential(
(0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=9, out_features=32, bias=True)
(2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Linear(in_features=32, out_features=16, bias=True)
(5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Linear(in_features=16, out_features=8, bias=True)
(8): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU()
(10): Linear(in_features=8, out_features=3316, bias=True)
), Best accuracy: 0.8235294117647058
Test Accuracy: 0.8418674698795181
In [ ]:
evaluate_ann(ann_model_ac_classroom, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]
Out[ ]:
| ANN | ||
|---|---|---|
| Metrics | ||
| Train | Accuracy | 81.8 |
| F1 Macro | 72.38 | |
| F1 Weighted | 80.58 | |
| Recall Macro | 74.18 | |
| Recall Weighted | 81.8 | |
| Precision Macro | 78.85 | |
| Precision Weighted | 82.45 | |
| Test | Accuracy | 83.27 |
| F1 Macro | 71.13 | |
| F1 Weighted | 82.46 | |
| Recall Macro | 73.79 | |
| Recall Weighted | 83.27 | |
| Precision Macro | 79.1 | |
| Precision Weighted | 84.73 |
Objects ->
- evaluator_ac_classroom
- evaluator_ac_classroom_hyper_tuned
Results ->
- result_ac_classroom
- result_ac_classroom_hyper_tuned
Non-AC Classroom ML Machine Learning Model¶
In [ ]:
# Preapring data for ML
df_tmp = new_df.query("`Room Type` == 1 and `Room Condition` == 2")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
non_ac_classroom_x = df_tmp.drop('Occupancy_Classified', axis = 1)
non_ac_classroom_y = df_tmp['Occupancy_Classified'].values # converting to numpy array
# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
non_ac_classroom_x = st_x.fit_transform(non_ac_classroom_x)
In [ ]:
# ML Training without hyper tuned
evaluator_non_ac_classroom = original_ml_pipeline_obj(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
================================================ LogisticRegression model has started training LogisticRegression model has ended training. Time -> 0.02s. Accuracy - > 76.20 % ================================================ ================================================ KNN model has started training KNN model has ended training. Time -> 0.0s. Accuracy - > 96.31 % ================================================ ================================================ SVM model has started training SVM model has ended training. Time -> 0.02s. Accuracy - > 79.34 % ================================================ ================================================ Linear SVM model has started training Linear SVM model has ended training. Time -> 0.03s. Accuracy - > 71.59 % ================================================ ================================================ DecisionTree model has started training DecisionTree model has ended training. Time -> 0.0s. Accuracy - > 96.13 % ================================================ ================================================ RandomForest model has started training RandomForest model has ended training. Time -> 3.07s. Accuracy - > 97.23 % ================================================ ================================================ XGB model has started training XGB model has ended training. Time -> 0.97s. Accuracy - > 97.60 % ================================================ ================================================ LGBM model has started training LGBM model has ended training. Time -> 2.57s. Accuracy - > 97.60 % ================================================ ================================================ GradientBoosting model has started training GradientBoosting model has ended training. Time -> 1.13s. Accuracy - > 96.31 % ================================================ ================================================ MLP Neural Net model has started training MLP Neural Net model has ended training. Time -> 1.15s. Accuracy - > 75.83 % ================================================ ================================================ AdaBoost model has started training AdaBoost model has ended training. Time -> 0.15s. Accuracy - > 68.27 % ================================================ ================================================ Naive Bayes model has started training Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 82.29 % ================================================ ================================================ QDA model has started training QDA model has ended training. Time -> 0.0s. Accuracy - > 71.03 % ================================================
In [ ]:
result_non_ac_classroom = evaluate_result(evaluator_non_ac_classroom)
result_non_ac_classroom
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 77.07 | 97.53 | 82.12 | 71.89 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 77.07 | 69.54 | 81.13 | 72.38 |
| F1 Macro | 55.99 | 96.07 | 68.99 | 41.59 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 53.1 | 63.58 | 75.12 | 56.02 | |
| F1 Weighted | 72.66 | 97.54 | 80.4 | 64.24 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 71.31 | 70.84 | 82.33 | 67.14 | |
| Recall Macro | 58.56 | 96.28 | 71.11 | 47.77 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 58.03 | 63.04 | 78.52 | 58.6 | |
| Recall Weighted | 77.07 | 97.53 | 82.12 | 71.89 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 77.07 | 69.54 | 81.13 | 72.38 | |
| Precision Macro | 60.52 | 95.87 | 72.52 | 54.42 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 49.25 | 71.41 | 78.15 | 65.99 | |
| Precision Weighted | 72.39 | 97.55 | 81.89 | 68.01 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 66.75 | 78.37 | 88.87 | 74.67 | |
| Test | Accuracy | 76.2 | 96.31 | 79.34 | 71.59 | 96.13 | 97.23 | 97.6 | 97.6 | 96.31 | 75.83 | 68.27 | 82.29 | 71.03 |
| F1 Macro | 55.9 | 94.21 | 66.44 | 42.59 | 93.8 | 95.39 | 96.32 | 96.12 | 94.49 | 53.3 | 61.31 | 77.98 | 57.81 | |
| F1 Weighted | 70.65 | 96.23 | 77.14 | 62.6 | 96.12 | 97.22 | 97.61 | 97.6 | 96.31 | 69.19 | 68.99 | 83.54 | 66.13 | |
| Recall Macro | 59.84 | 93.87 | 68.1 | 48.84 | 93.85 | 95.13 | 96.08 | 95.78 | 94.53 | 59.38 | 59.77 | 80.44 | 60.95 | |
| Recall Weighted | 76.2 | 96.31 | 79.34 | 71.59 | 96.13 | 97.23 | 97.6 | 97.6 | 96.31 | 75.83 | 68.27 | 82.29 | 71.03 | |
| Precision Macro | 61.06 | 94.76 | 69.4 | 54.63 | 93.78 | 95.66 | 96.6 | 96.48 | 94.48 | 48.88 | 68.43 | 80.5 | 65.16 | |
| Precision Weighted | 71.09 | 96.29 | 77.85 | 64.59 | 96.12 | 97.21 | 97.63 | 97.61 | 96.32 | 64.09 | 73.85 | 89.17 | 73.0 |
In [ ]:
metric_to_show = 'Accuracy'
ax = result_non_ac_classroom.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_non_ac_classroom.columns.to_numpy())), result_non_ac_classroom.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
evaluator_non_ac_classroom_hyper_tuned = hyper_tuned_ml_pipeline_obj(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
================================================ LogisticRegression tuned model has started training Fitting 5 folds for each of 40 candidates, totalling 200 fits LogisticRegression tuned model has ended training. Time -> 19.26s. Accuracy - > 80.81 % ================================================ ================================================ KNN tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits KNN tuned model has ended training. Time -> 1.33s. Accuracy - > 95.20 % ================================================ ================================================ SVM tuned model has started training Fitting 5 folds for each of 49 candidates, totalling 245 fits SVM tuned model has ended training. Time -> 3.81s. Accuracy - > 95.57 % ================================================ ================================================ Linear SVM tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits Linear SVM tuned model has ended training. Time -> 0.25s. Accuracy - > 83.39 % ================================================ ================================================ DecisionTree tuned model has started training Fitting 5 folds for each of 4800 candidates, totalling 24000 fits DecisionTree tuned model has ended training. Time -> 103.19s. Accuracy - > 96.13 % ================================================ ================================================ RandomForest tuned model has started training Fitting 5 folds for each of 10 candidates, totalling 50 fits RandomForest tuned model has ended training. Time -> 171.82s. Accuracy - > 97.42 % ================================================ ================================================ XGB tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits XGB tuned model has ended training. Time -> 85.58s. Accuracy - > 97.23 % ================================================ ================================================ LGBM tuned model has started training Fitting 5 folds for each of 150 candidates, totalling 750 fits LGBM tuned model has ended training. Time -> 93.3s. Accuracy - > 97.60 % ================================================ ================================================ GradientBoosting tuned model has started training Fitting 5 folds for each of 3 candidates, totalling 15 fits GradientBoosting tuned model has ended training. Time -> 17.3s. Accuracy - > 96.49 % ================================================ ================================================ MLP Neural Net tuned model has started training Fitting 5 folds for each of 50 candidates, totalling 250 fits MLP Neural Net tuned model has ended training. Time -> 311.3s. Accuracy - > 96.86 % ================================================ ================================================ AdaBoost tuned model has started training Fitting 5 folds for each of 20 candidates, totalling 100 fits AdaBoost tuned model has ended training. Time -> 41.46s. Accuracy - > 64.76 % ================================================ ================================================ Naive Bayes tuned model has started training Fitting 5 folds for each of 100 candidates, totalling 500 fits Naive Bayes tuned model has ended training. Time -> 1.65s. Accuracy - > 84.50 % ================================================ ================================================ QDA tuned model has started training Fitting 5 folds for each of 5 candidates, totalling 25 fits QDA tuned model has ended training. Time -> 0.14s. Accuracy - > 73.80 % ================================================
In [ ]:
result_non_ac_classroom_hyper_tuned = evaluate_result(evaluator_non_ac_classroom_hyper_tuned)
result_non_ac_classroom_hyper_tuned
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | ||||||||||||||
| Train | Accuracy | 83.85 | 100.0 | 97.78 | 84.96 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 67.45 | 83.48 | 75.59 |
| F1 Macro | 71.88 | 100.0 | 95.99 | 71.27 | 99.13 | 100.0 | 100.0 | 100.0 | 100.0 | 98.89 | 55.33 | 77.79 | 59.82 | |
| F1 Weighted | 82.38 | 100.0 | 97.78 | 82.6 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 66.42 | 84.76 | 73.27 | |
| Recall Macro | 70.01 | 100.0 | 95.95 | 71.08 | 99.01 | 100.0 | 100.0 | 100.0 | 100.0 | 98.95 | 52.83 | 81.19 | 65.3 | |
| Recall Weighted | 83.85 | 100.0 | 97.78 | 84.96 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 67.45 | 83.48 | 75.59 | |
| Precision Macro | 78.5 | 100.0 | 96.05 | 87.0 | 99.28 | 100.0 | 100.0 | 100.0 | 100.0 | 98.87 | 63.59 | 79.05 | 61.57 | |
| Precision Weighted | 83.23 | 100.0 | 97.79 | 86.97 | 99.52 | 100.0 | 100.0 | 100.0 | 100.0 | 99.52 | 71.07 | 89.74 | 77.17 | |
| Test | Accuracy | 80.81 | 95.2 | 95.57 | 83.39 | 96.13 | 97.42 | 97.23 | 97.6 | 96.49 | 96.86 | 64.76 | 84.5 | 73.8 |
| F1 Macro | 68.97 | 92.3 | 92.94 | 69.71 | 93.97 | 95.75 | 95.57 | 95.98 | 94.72 | 94.64 | 54.23 | 79.72 | 59.46 | |
| F1 Weighted | 79.11 | 95.16 | 95.51 | 80.63 | 96.11 | 97.41 | 97.23 | 97.58 | 96.49 | 96.86 | 63.45 | 85.75 | 71.23 | |
| Recall Macro | 69.02 | 92.08 | 92.32 | 70.58 | 93.72 | 95.45 | 95.45 | 95.69 | 94.62 | 94.36 | 52.03 | 82.21 | 64.66 | |
| Recall Weighted | 80.81 | 95.2 | 95.57 | 83.39 | 96.13 | 97.42 | 97.23 | 97.6 | 96.49 | 96.86 | 64.76 | 84.5 | 73.8 | |
| Precision Macro | 74.75 | 92.54 | 93.69 | 84.74 | 94.48 | 96.07 | 95.71 | 96.31 | 94.82 | 95.05 | 62.36 | 80.62 | 60.39 | |
| Precision Weighted | 80.73 | 95.13 | 95.53 | 86.05 | 96.23 | 97.41 | 97.23 | 97.58 | 96.48 | 96.91 | 68.05 | 89.78 | 74.4 |
In [ ]:
ann_model_non_ac_classroom = ann_model(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
Epoch 1/50, Train Loss: 6.5778, Train Accuracy: 0.0000, Dev Accuracy: 0.0000 Epoch 2/50, Train Loss: 6.1484, Train Accuracy: 0.3921, Dev Accuracy: 0.3579 Epoch 3/50, Train Loss: 5.8990, Train Accuracy: 0.6893, Dev Accuracy: 0.6679 Epoch 4/50, Train Loss: 5.5161, Train Accuracy: 0.7213, Dev Accuracy: 0.7085 Epoch 5/50, Train Loss: 4.8806, Train Accuracy: 0.7275, Dev Accuracy: 0.7159 Epoch 6/50, Train Loss: 4.1998, Train Accuracy: 0.7263, Dev Accuracy: 0.7122 Epoch 7/50, Train Loss: 3.8079, Train Accuracy: 0.7263, Dev Accuracy: 0.7122 Epoch 8/50, Train Loss: 4.3991, Train Accuracy: 0.7263, Dev Accuracy: 0.7122 Epoch 9/50, Train Loss: 3.6251, Train Accuracy: 0.7263, Dev Accuracy: 0.7085 Epoch 10/50, Train Loss: 2.0269, Train Accuracy: 0.7448, Dev Accuracy: 0.7306 Epoch 11/50, Train Loss: 2.9066, Train Accuracy: 0.7275, Dev Accuracy: 0.7122 Epoch 12/50, Train Loss: 2.8730, Train Accuracy: 0.7386, Dev Accuracy: 0.7196 Epoch 13/50, Train Loss: 2.9959, Train Accuracy: 0.7287, Dev Accuracy: 0.7122 Epoch 14/50, Train Loss: 2.4475, Train Accuracy: 0.7423, Dev Accuracy: 0.7306 Epoch 15/50, Train Loss: 1.6522, Train Accuracy: 0.7300, Dev Accuracy: 0.7159 Epoch 16/50, Train Loss: 1.3678, Train Accuracy: 0.7337, Dev Accuracy: 0.7159 Epoch 17/50, Train Loss: 0.6128, Train Accuracy: 0.7411, Dev Accuracy: 0.7232 Epoch 18/50, Train Loss: 1.2833, Train Accuracy: 0.7374, Dev Accuracy: 0.7159 Epoch 19/50, Train Loss: 2.5468, Train Accuracy: 0.7435, Dev Accuracy: 0.7232 Epoch 20/50, Train Loss: 1.5095, Train Accuracy: 0.7596, Dev Accuracy: 0.7454 Epoch 21/50, Train Loss: 0.6802, Train Accuracy: 0.7756, Dev Accuracy: 0.7528 Epoch 22/50, Train Loss: 0.9016, Train Accuracy: 0.7485, Dev Accuracy: 0.7232 Epoch 23/50, Train Loss: 0.7238, Train Accuracy: 0.7608, Dev Accuracy: 0.7343 Epoch 24/50, Train Loss: 1.0489, Train Accuracy: 0.7657, Dev Accuracy: 0.7343 Epoch 25/50, Train Loss: 0.5933, Train Accuracy: 0.7990, Dev Accuracy: 0.7491 Epoch 26/50, Train Loss: 0.5710, Train Accuracy: 0.7818, Dev Accuracy: 0.7491 Epoch 27/50, Train Loss: 1.0082, Train Accuracy: 0.7805, Dev Accuracy: 0.7565 Epoch 28/50, Train Loss: 1.1951, Train Accuracy: 0.7830, Dev Accuracy: 0.7417 Epoch 29/50, Train Loss: 0.5490, Train Accuracy: 0.8052, Dev Accuracy: 0.7528 Epoch 30/50, Train Loss: 0.9078, Train Accuracy: 0.8175, Dev Accuracy: 0.7638 Epoch 31/50, Train Loss: 0.5229, Train Accuracy: 0.7361, Dev Accuracy: 0.7232 Epoch 32/50, Train Loss: 1.5286, Train Accuracy: 0.8311, Dev Accuracy: 0.7675 Epoch 33/50, Train Loss: 0.9227, Train Accuracy: 0.7719, Dev Accuracy: 0.7601 Epoch 34/50, Train Loss: 0.6001, Train Accuracy: 0.8064, Dev Accuracy: 0.7712 Epoch 35/50, Train Loss: 0.5580, Train Accuracy: 0.8631, Dev Accuracy: 0.8118 Epoch 36/50, Train Loss: 1.3681, Train Accuracy: 0.8570, Dev Accuracy: 0.8081 Epoch 37/50, Train Loss: 1.0145, Train Accuracy: 0.7768, Dev Accuracy: 0.7528 Epoch 38/50, Train Loss: 0.6287, Train Accuracy: 0.7448, Dev Accuracy: 0.7232 Epoch 39/50, Train Loss: 1.4899, Train Accuracy: 0.8742, Dev Accuracy: 0.8524 Epoch 40/50, Train Loss: 0.7135, Train Accuracy: 0.8779, Dev Accuracy: 0.8708 Epoch 41/50, Train Loss: 0.7044, Train Accuracy: 0.8742, Dev Accuracy: 0.8598 Epoch 42/50, Train Loss: 0.5358, Train Accuracy: 0.8755, Dev Accuracy: 0.8376 Epoch 43/50, Train Loss: 0.3612, Train Accuracy: 0.8767, Dev Accuracy: 0.8413 Epoch 44/50, Train Loss: 0.2973, Train Accuracy: 0.8829, Dev Accuracy: 0.8598 Epoch 45/50, Train Loss: 0.6728, Train Accuracy: 0.7842, Dev Accuracy: 0.7565 Epoch 46/50, Train Loss: 0.1552, Train Accuracy: 0.8792, Dev Accuracy: 0.8561 Epoch 47/50, Train Loss: 0.4570, Train Accuracy: 0.8816, Dev Accuracy: 0.8524 Epoch 48/50, Train Loss: 0.9370, Train Accuracy: 0.8940, Dev Accuracy: 0.8708 Epoch 49/50, Train Loss: 1.0368, Train Accuracy: 0.8002, Dev Accuracy: 0.8044 Epoch 50/50, Train Loss: 0.5546, Train Accuracy: 0.8890, Dev Accuracy: 0.8598
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 60
1 0.94 0.97 0.95 60
2 0.44 0.67 0.53 18
3 0.86 0.83 0.84 23
4 0.50 0.48 0.49 27
5 0.96 0.86 0.90 83
accuracy 0.86 271
macro avg 0.78 0.80 0.79 271
weighted avg 0.88 0.86 0.86 271
F1-Score: 0.8649
Hidden layers configuration: [16, 8], Accuracy: 0.8597785977859779
Epoch 1/50, Train Loss: 7.0561, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 2/50, Train Loss: 6.9467, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 3/50, Train Loss: 6.6444, Train Accuracy: 0.1985, Dev Accuracy: 0.1808
Epoch 4/50, Train Loss: 6.0677, Train Accuracy: 0.4797, Dev Accuracy: 0.4207
Epoch 5/50, Train Loss: 6.7883, Train Accuracy: 0.4834, Dev Accuracy: 0.4207
Epoch 6/50, Train Loss: 5.4200, Train Accuracy: 0.4821, Dev Accuracy: 0.4207
Epoch 7/50, Train Loss: 4.9502, Train Accuracy: 0.5018, Dev Accuracy: 0.4502
Epoch 8/50, Train Loss: 4.1433, Train Accuracy: 0.5968, Dev Accuracy: 0.6125
Epoch 9/50, Train Loss: 3.8792, Train Accuracy: 0.6276, Dev Accuracy: 0.6273
Epoch 10/50, Train Loss: 3.7625, Train Accuracy: 0.6326, Dev Accuracy: 0.6384
Epoch 11/50, Train Loss: 4.1851, Train Accuracy: 0.5857, Dev Accuracy: 0.5535
Epoch 12/50, Train Loss: 4.0939, Train Accuracy: 0.6178, Dev Accuracy: 0.5646
Epoch 13/50, Train Loss: 2.5764, Train Accuracy: 0.5857, Dev Accuracy: 0.5424
Epoch 14/50, Train Loss: 2.6895, Train Accuracy: 0.6350, Dev Accuracy: 0.5978
Epoch 15/50, Train Loss: 1.8739, Train Accuracy: 0.6387, Dev Accuracy: 0.6125
Epoch 16/50, Train Loss: 1.8339, Train Accuracy: 0.6979, Dev Accuracy: 0.6974
Epoch 17/50, Train Loss: 4.0362, Train Accuracy: 0.7250, Dev Accuracy: 0.6900
Epoch 18/50, Train Loss: 1.2412, Train Accuracy: 0.6806, Dev Accuracy: 0.6863
Epoch 19/50, Train Loss: 3.9366, Train Accuracy: 0.7263, Dev Accuracy: 0.7196
Epoch 20/50, Train Loss: 2.1340, Train Accuracy: 0.7448, Dev Accuracy: 0.7269
Epoch 21/50, Train Loss: 3.1835, Train Accuracy: 0.7485, Dev Accuracy: 0.7380
Epoch 22/50, Train Loss: 1.7466, Train Accuracy: 0.7509, Dev Accuracy: 0.7380
Epoch 23/50, Train Loss: 2.1988, Train Accuracy: 0.7497, Dev Accuracy: 0.7380
Epoch 24/50, Train Loss: 2.4345, Train Accuracy: 0.7386, Dev Accuracy: 0.7196
Epoch 25/50, Train Loss: 2.1015, Train Accuracy: 0.7349, Dev Accuracy: 0.7343
Epoch 26/50, Train Loss: 2.8902, Train Accuracy: 0.7287, Dev Accuracy: 0.7196
Epoch 27/50, Train Loss: 1.8860, Train Accuracy: 0.7485, Dev Accuracy: 0.7380
Epoch 28/50, Train Loss: 2.3005, Train Accuracy: 0.7472, Dev Accuracy: 0.7380
Epoch 29/50, Train Loss: 1.5570, Train Accuracy: 0.7337, Dev Accuracy: 0.7159
Epoch 30/50, Train Loss: 0.7955, Train Accuracy: 0.7472, Dev Accuracy: 0.7343
Epoch 31/50, Train Loss: 1.2514, Train Accuracy: 0.7719, Dev Accuracy: 0.7565
Epoch 32/50, Train Loss: 1.1841, Train Accuracy: 0.7571, Dev Accuracy: 0.7343
Epoch 33/50, Train Loss: 2.9043, Train Accuracy: 0.7657, Dev Accuracy: 0.7601
Epoch 34/50, Train Loss: 1.8747, Train Accuracy: 0.7337, Dev Accuracy: 0.7159
Epoch 35/50, Train Loss: 1.5866, Train Accuracy: 0.7965, Dev Accuracy: 0.7675
Epoch 36/50, Train Loss: 1.4221, Train Accuracy: 0.8101, Dev Accuracy: 0.7749
Epoch 37/50, Train Loss: 1.2736, Train Accuracy: 0.8187, Dev Accuracy: 0.7786
Epoch 38/50, Train Loss: 0.6414, Train Accuracy: 0.7855, Dev Accuracy: 0.7565
Epoch 39/50, Train Loss: 1.2493, Train Accuracy: 0.8150, Dev Accuracy: 0.7749
Epoch 40/50, Train Loss: 0.1796, Train Accuracy: 0.8274, Dev Accuracy: 0.7823
Epoch 41/50, Train Loss: 0.4022, Train Accuracy: 0.8261, Dev Accuracy: 0.7860
Epoch 42/50, Train Loss: 0.7608, Train Accuracy: 0.8348, Dev Accuracy: 0.7897
Epoch 43/50, Train Loss: 1.3726, Train Accuracy: 0.8483, Dev Accuracy: 0.8229
Epoch 44/50, Train Loss: 1.5783, Train Accuracy: 0.8323, Dev Accuracy: 0.8155
Epoch 45/50, Train Loss: 0.9034, Train Accuracy: 0.7904, Dev Accuracy: 0.7638
Epoch 46/50, Train Loss: 0.8862, Train Accuracy: 0.8767, Dev Accuracy: 0.8487
Epoch 47/50, Train Loss: 0.3232, Train Accuracy: 0.8792, Dev Accuracy: 0.8487
Epoch 48/50, Train Loss: 0.6400, Train Accuracy: 0.8335, Dev Accuracy: 0.8192
Epoch 49/50, Train Loss: 0.9830, Train Accuracy: 0.6301, Dev Accuracy: 0.6273
Epoch 50/50, Train Loss: 0.4923, Train Accuracy: 0.8730, Dev Accuracy: 0.8598
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 60
1 0.84 0.97 0.90 60
2 0.48 0.78 0.60 18
3 0.95 0.83 0.88 23
4 0.45 0.33 0.38 27
5 1.00 0.88 0.94 83
accuracy 0.86 271
macro avg 0.79 0.80 0.78 271
weighted avg 0.87 0.86 0.86 271
F1-Score: 0.8599
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8597785977859779
Epoch 1/50, Train Loss: 6.4835, Train Accuracy: 0.1899, Dev Accuracy: 0.1993
Epoch 2/50, Train Loss: 6.2137, Train Accuracy: 0.5598, Dev Accuracy: 0.4908
Epoch 3/50, Train Loss: 5.5238, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 4/50, Train Loss: 5.3057, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 5/50, Train Loss: 4.2484, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 6/50, Train Loss: 3.9475, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 7/50, Train Loss: 3.7466, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 8/50, Train Loss: 2.8665, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 9/50, Train Loss: 2.8141, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 10/50, Train Loss: 2.0393, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 11/50, Train Loss: 3.1371, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 12/50, Train Loss: 1.5216, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 13/50, Train Loss: 3.3945, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 14/50, Train Loss: 3.0591, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 15/50, Train Loss: 2.5538, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 16/50, Train Loss: 2.7765, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 17/50, Train Loss: 2.2761, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 18/50, Train Loss: 2.4770, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 19/50, Train Loss: 1.5217, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 20/50, Train Loss: 2.8558, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 21/50, Train Loss: 1.4355, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 22/50, Train Loss: 2.9788, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 23/50, Train Loss: 2.6036, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 24/50, Train Loss: 1.5833, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 25/50, Train Loss: 1.2470, Train Accuracy: 0.6079, Dev Accuracy: 0.5609
Epoch 26/50, Train Loss: 1.8446, Train Accuracy: 0.7238, Dev Accuracy: 0.7122
Epoch 27/50, Train Loss: 1.0970, Train Accuracy: 0.8200, Dev Accuracy: 0.7786
Epoch 28/50, Train Loss: 1.5065, Train Accuracy: 0.7953, Dev Accuracy: 0.7491
Epoch 29/50, Train Loss: 1.1908, Train Accuracy: 0.8113, Dev Accuracy: 0.7638
Epoch 30/50, Train Loss: 0.7517, Train Accuracy: 0.7916, Dev Accuracy: 0.7601
Epoch 31/50, Train Loss: 0.4835, Train Accuracy: 0.8101, Dev Accuracy: 0.7638
Epoch 32/50, Train Loss: 0.9260, Train Accuracy: 0.8076, Dev Accuracy: 0.7786
Epoch 33/50, Train Loss: 1.1875, Train Accuracy: 0.8261, Dev Accuracy: 0.8007
Epoch 34/50, Train Loss: 0.9645, Train Accuracy: 0.8052, Dev Accuracy: 0.7786
Epoch 35/50, Train Loss: 0.9682, Train Accuracy: 0.7818, Dev Accuracy: 0.7380
Epoch 36/50, Train Loss: 0.6575, Train Accuracy: 0.8212, Dev Accuracy: 0.7823
Epoch 37/50, Train Loss: 1.0580, Train Accuracy: 0.8138, Dev Accuracy: 0.7749
Epoch 38/50, Train Loss: 0.4384, Train Accuracy: 0.8064, Dev Accuracy: 0.7860
Epoch 39/50, Train Loss: 1.2047, Train Accuracy: 0.8286, Dev Accuracy: 0.8007
Epoch 40/50, Train Loss: 0.2437, Train Accuracy: 0.7855, Dev Accuracy: 0.7565
Epoch 41/50, Train Loss: 0.4326, Train Accuracy: 0.8101, Dev Accuracy: 0.7786
Epoch 42/50, Train Loss: 0.3186, Train Accuracy: 0.8570, Dev Accuracy: 0.8339
Epoch 43/50, Train Loss: 0.7712, Train Accuracy: 0.7978, Dev Accuracy: 0.7897
Epoch 44/50, Train Loss: 0.3090, Train Accuracy: 0.8730, Dev Accuracy: 0.8635
Epoch 45/50, Train Loss: 1.0190, Train Accuracy: 0.8397, Dev Accuracy: 0.8118
Epoch 46/50, Train Loss: 0.3706, Train Accuracy: 0.8533, Dev Accuracy: 0.8339
Epoch 47/50, Train Loss: 0.3101, Train Accuracy: 0.8434, Dev Accuracy: 0.8303
Epoch 48/50, Train Loss: 0.4496, Train Accuracy: 0.8089, Dev Accuracy: 0.8081
Epoch 49/50, Train Loss: 0.6742, Train Accuracy: 0.8508, Dev Accuracy: 0.8561
Epoch 50/50, Train Loss: 0.3206, Train Accuracy: 0.8841, Dev Accuracy: 0.8745
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 60
1 0.89 0.95 0.92 60
2 0.54 0.72 0.62 18
3 0.89 0.74 0.81 23
4 0.50 0.44 0.47 27
5 0.97 0.94 0.96 83
accuracy 0.87 271
macro avg 0.80 0.80 0.80 271
weighted avg 0.88 0.87 0.87 271
F1-Score: 0.8748
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.8745387453874539
Best model hidden layers configuration: Sequential(
(0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=9, out_features=64, bias=True)
(2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Linear(in_features=64, out_features=32, bias=True)
(5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Linear(in_features=32, out_features=16, bias=True)
(8): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU()
(10): Linear(in_features=16, out_features=8, bias=True)
(11): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU()
(13): Linear(in_features=8, out_features=1353, bias=True)
), Best accuracy: 0.8745387453874539
Test Accuracy: 0.8782287822878229
In [ ]:
evaluate_ann(ann_model_non_ac_classroom, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.2, random_state=42))[1]
Out[ ]:
| ANN | ||
|---|---|---|
| Metrics | ||
| Train | Accuracy | 88.35 |
| F1 Macro | 78.96 | |
| F1 Weighted | 88.31 | |
| Recall Macro | 78.78 | |
| Recall Weighted | 88.35 | |
| Precision Macro | 80.25 | |
| Precision Weighted | 88.79 | |
| Test | Accuracy | 87.08 |
| F1 Macro | 80.34 | |
| F1 Weighted | 87.0 | |
| Recall Macro | 81.08 | |
| Recall Weighted | 87.08 | |
| Precision Macro | 81.0 | |
| Precision Weighted | 87.61 |
In [ ]:
metric_to_show = 'Accuracy'
result_non_ac_classroom_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_non_ac_classroom_hyper_tuned.columns.to_numpy())), result_non_ac_classroom_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_non_ac_classroom_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_non_ac_classroom.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_non_ac_classroom.columns.to_numpy())), result_non_ac_classroom.columns.to_numpy(), rotation=90)
plt.show();
In [ ]:
model_names, models = [], []
for model_name in evaluator_non_ac_classroom_hyper_tuned.model_names:
model_names.append(model_name)
models.append(evaluator_non_ac_classroom_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
Objects ->
- evaluator_non_ac_classroom
- evaluator_non_ac_classroom_hyper_tuned
Results ->
- result_non_ac_classroom
- result_non_ac_classroom_hyper_tuned
Saving ML models¶
In [ ]:
with open("ac_lab_original_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_ac_lab, file)
with open("ac_lab_hypertuned_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_ac_lab_hyper_tuned, file)
with open("ac_classroom_original_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_ac_classroom, file)
with open("ac_classroom_hypertuned_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_ac_classroom_hyper_tuned, file)
with open("non_ac_classroom_original_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_non_ac_classroom, file)
with open("non_ac_classroom_hypertuned_models_object_40_test.pkl", "wb") as file:
joblib.dump(evaluator_non_ac_classroom_hyper_tuned, file)
In [ ]:
with open("ac_lab_ann_model_object_40_test.pkl", "wb") as file:
torch.save(ann_model_ac_lab, file)
with open("ac_classroom_ann_model_object_40_test.pkl", "wb") as file:
torch.save(ann_model_ac_classroom, file)
with open("non_ac_classroom_ann_model_object_40_test.pkl", "wb") as file:
torch.save(ann_model_non_ac_classroom, file)
In [ ]:
# checking
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp = joblib.load(file)
model_name = 'MLP Neural Net'
scores = tmp.get_metric_scores(model_name)
print(f'Metric Scores for Model {model_name}:')
for metric, score in scores.items():
print(f'{metric}: {score}')
Metric Scores for Model MLP Neural Net: Train Accuracy: 0.990558615263572 Train F1 Macro: 0.9763425165179429 Train F1 Weighted: 0.9904363609691609 Train Recall Macro: 0.9641764863338181 Train Recall Weighted: 0.990558615263572 Train Precision Macro: 0.990815235430536 Train Precision Weighted: 0.9906684296202051 Train Confusion Matrix: [[1203 7 0 0] [ 5 735 1 0] [ 0 0 506 0] [ 0 0 11 74]] Test Accuracy: 0.9746462264150944 Test F1 Macro: 0.948302223342338 Test F1 Weighted: 0.9744902650792115 Test Recall Macro: 0.9373216576195762 Test Recall Weighted: 0.9746462264150944 Test Precision Macro: 0.9612730972945318 Test Precision Weighted: 0.9746150649055128 Test Confusion Matrix: [[799 15 0 0] [ 9 451 3 0] [ 0 4 364 3] [ 0 0 9 39]]
In [ ]:
# checking
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
Out[ ]:
| ANN | ||
|---|---|---|
| Metrics | ||
| Train | Accuracy | 88.12 |
| F1 Macro | 82.65 | |
| F1 Weighted | 87.59 | |
| Recall Macro | 80.43 | |
| Recall Weighted | 88.12 | |
| Precision Macro | 87.01 | |
| Precision Weighted | 88.5 | |
| Test | Accuracy | 87.38 |
| F1 Macro | 81.12 | |
| F1 Weighted | 86.96 | |
| Recall Macro | 78.81 | |
| Recall Weighted | 87.38 | |
| Precision Macro | 85.23 | |
| Precision Weighted | 87.7 |
Merging ANN with other models¶
AC LAB¶
In [ ]:
with open("/content/ac_lab_original_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp1 = evaluate_result(tmp1)
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
tmp1.join(tmp2)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 75.18 | 97.29 | 83.32 | 73.25 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 86.31 | 42.25 | 56.45 | 33.4 | 88.12 |
| F1 Macro | 60.59 | 96.02 | 75.55 | 53.63 | 100.0 | 100.0 | 99.79 | 99.73 | 100.0 | 79.19 | 22.97 | 41.23 | 7.15 | 82.65 | |
| F1 Weighted | 72.13 | 97.28 | 82.42 | 69.2 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 85.64 | 31.42 | 50.4 | 16.72 | 87.59 | |
| Recall Macro | 60.89 | 95.45 | 72.76 | 56.39 | 100.0 | 100.0 | 99.79 | 99.88 | 100.0 | 77.41 | 33.63 | 50.38 | 14.29 | 80.43 | |
| Recall Weighted | 75.18 | 97.29 | 83.32 | 73.25 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 86.31 | 42.25 | 56.45 | 33.4 | 88.12 | |
| Precision Macro | 67.28 | 96.65 | 86.75 | 52.53 | 100.0 | 100.0 | 99.79 | 99.59 | 100.0 | 85.67 | 18.26 | 58.89 | 4.77 | 87.01 | |
| Precision Weighted | 71.66 | 97.29 | 84.91 | 66.94 | 100.0 | 100.0 | 99.92 | 99.84 | 100.0 | 87.01 | 26.59 | 64.75 | 11.15 | 88.5 | |
| Test | Accuracy | 76.65 | 93.99 | 83.67 | 73.94 | 97.29 | 98.41 | 98.17 | 97.7 | 97.88 | 86.5 | 43.51 | 57.49 | 34.91 | 87.38 |
| F1 Macro | 61.96 | 90.37 | 74.4 | 53.66 | 95.39 | 97.78 | 97.11 | 96.68 | 96.58 | 78.88 | 23.29 | 41.67 | 7.39 | 81.12 | |
| F1 Weighted | 73.78 | 93.96 | 82.78 | 70.18 | 97.29 | 98.41 | 98.17 | 97.7 | 97.87 | 85.87 | 33.21 | 52.43 | 18.06 | 86.96 | |
| Recall Macro | 61.53 | 89.84 | 71.37 | 55.85 | 95.3 | 97.68 | 96.87 | 96.45 | 96.0 | 77.28 | 33.61 | 51.34 | 14.29 | 78.81 | |
| Recall Weighted | 76.65 | 93.99 | 83.67 | 73.94 | 97.29 | 98.41 | 98.17 | 97.7 | 97.88 | 86.5 | 43.51 | 57.49 | 34.91 | 87.38 | |
| Precision Macro | 68.13 | 90.96 | 86.34 | 53.12 | 95.54 | 97.91 | 97.38 | 96.94 | 97.26 | 84.01 | 18.84 | 58.82 | 4.99 | 85.23 | |
| Precision Weighted | 72.93 | 93.96 | 85.18 | 68.18 | 97.32 | 98.42 | 98.19 | 97.72 | 97.89 | 86.93 | 28.76 | 66.36 | 12.18 | 87.7 |
In [ ]:
metric_to_show = 'Accuracy'
tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp3 = joblib.load(file)
tmp3 = evaluate_result(tmp3)
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
tmp3.join(tmp4)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 77.62 | 100.0 | 95.83 | 86.15 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.25 | 69.79 | 65.38 | 70.02 | 88.12 |
| F1 Macro | 70.58 | 100.0 | 94.37 | 80.13 | 100.0 | 99.4 | 99.79 | 99.73 | 100.0 | 95.69 | 53.34 | 60.57 | 59.54 | 82.65 | |
| F1 Weighted | 75.69 | 100.0 | 95.82 | 85.09 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.23 | 66.48 | 65.16 | 68.01 | 87.59 | |
| Recall Macro | 70.03 | 100.0 | 93.44 | 77.99 | 100.0 | 99.34 | 99.79 | 99.88 | 100.0 | 94.71 | 55.56 | 64.78 | 63.49 | 80.43 | |
| Recall Weighted | 77.62 | 100.0 | 95.83 | 86.15 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.25 | 69.79 | 65.38 | 70.02 | 88.12 | |
| Precision Macro | 75.52 | 100.0 | 95.51 | 88.78 | 100.0 | 99.47 | 99.79 | 99.59 | 100.0 | 96.87 | 58.95 | 65.15 | 59.2 | 87.01 | |
| Precision Weighted | 76.01 | 100.0 | 95.89 | 86.75 | 100.0 | 99.57 | 99.92 | 99.84 | 100.0 | 97.29 | 67.37 | 68.9 | 67.77 | 88.5 | |
| Test | Accuracy | 78.24 | 96.52 | 93.93 | 86.32 | 96.82 | 97.88 | 98.23 | 97.88 | 97.82 | 94.93 | 71.17 | 64.45 | 69.99 | 87.38 |
| F1 Macro | 69.87 | 95.01 | 91.52 | 80.41 | 94.21 | 97.41 | 97.22 | 97.05 | 96.53 | 91.72 | 54.42 | 58.22 | 58.62 | 81.12 | |
| F1 Weighted | 76.42 | 96.52 | 93.96 | 85.46 | 96.79 | 97.88 | 98.23 | 97.88 | 97.81 | 94.88 | 68.36 | 64.65 | 68.33 | 86.96 | |
| Recall Macro | 68.92 | 95.14 | 91.13 | 78.23 | 93.37 | 97.45 | 96.96 | 96.88 | 95.93 | 90.65 | 55.72 | 63.22 | 63.06 | 78.81 | |
| Recall Weighted | 78.24 | 96.52 | 93.93 | 86.32 | 96.82 | 97.88 | 98.23 | 97.88 | 97.82 | 94.93 | 71.17 | 64.45 | 69.99 | 87.38 | |
| Precision Macro | 75.92 | 94.89 | 92.09 | 88.26 | 95.27 | 97.41 | 97.51 | 97.24 | 97.24 | 92.97 | 63.21 | 62.25 | 59.02 | 85.23 | |
| Precision Weighted | 77.03 | 96.53 | 94.05 | 87.04 | 96.81 | 97.91 | 98.24 | 97.89 | 97.84 | 94.9 | 70.74 | 68.73 | 68.68 | 87.7 |
In [ ]:
metric_to_show = 'Accuracy'
tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
AC LAB FOR RESEARCH PAPER¶
In [ ]:
with open("/content/ac_lab_original_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp1 = evaluate_result(tmp1)
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
qwe = tmp1.join(tmp2).loc['Test'].loc[['Accuracy', 'F1 Weighted', 'Recall Weighted', 'Precision Weighted']][
['KNN', 'SVM', 'DecisionTree', 'RandomForest', 'XGB', 'GradientBoosting', 'MLP Neural Net', 'ANN']].T
qwe['F1 Weighted'] = qwe['F1 Weighted'].map(lambda x: round(x / 100, 2))
qwe['Recall Weighted'] = qwe['Recall Weighted'].map(lambda x: round(x / 100, 2))
qwe['Precision Weighted'] = qwe['Precision Weighted'].map(lambda x: round(x / 100, 2))
qwe
Out[ ]:
| Metrics | Accuracy | F1 Weighted | Recall Weighted | Precision Weighted |
|---|---|---|---|---|
| KNN | 96.7 | 0.97 | 0.97 | 0.97 |
| SVM | 91.16 | 0.91 | 0.91 | 0.91 |
| DecisionTree | 98.11 | 0.98 | 0.98 | 0.98 |
| RandomForest | 98.82 | 0.99 | 0.99 | 0.99 |
| XGB | 99.23 | 0.99 | 0.99 | 0.99 |
| GradientBoosting | 98.58 | 0.99 | 0.99 | 0.99 |
| MLP Neural Net | 91.69 | 0.91 | 0.92 | 0.92 |
| ANN | 93.4 | 0.93 | 0.93 | 0.94 |
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp3 = joblib.load(file)
tmp3 = evaluate_result(tmp3)
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
qwe_h = tmp3.join(tmp4).loc['Test'].loc[['Accuracy', 'F1 Weighted', 'Recall Weighted', 'Precision Weighted']][['KNN', 'SVM', 'DecisionTree', 'RandomForest', 'XGB', 'GradientBoosting', 'MLP Neural Net', 'ANN']].T
qwe_h['F1 Weighted'] = qwe_h['F1 Weighted'].map(lambda x: round(x / 100, 2))
qwe_h['Recall Weighted'] = qwe_h['Recall Weighted'].map(lambda x: round(x / 100, 2))
qwe_h['Precision Weighted'] = qwe_h['Precision Weighted'].map(lambda x: round(x / 100, 2))
qwe_h
Out[ ]:
| Metrics | Accuracy | F1 Weighted | Recall Weighted | Precision Weighted |
|---|---|---|---|---|
| KNN | 97.76 | 0.98 | 0.98 | 0.98 |
| SVM | 96.99 | 0.97 | 0.97 | 0.97 |
| DecisionTree | 98.82 | 0.99 | 0.99 | 0.99 |
| RandomForest | 98.41 | 0.98 | 0.98 | 0.98 |
| XGB | 99.17 | 0.99 | 0.99 | 0.99 |
| GradientBoosting | 98.76 | 0.99 | 0.99 | 0.99 |
| MLP Neural Net | 97.46 | 0.97 | 0.97 | 0.97 |
| ANN | 93.4 | 0.93 | 0.93 | 0.94 |
In [ ]:
# Sample data
models = qwe.rename(index = {'RandomForest': 'RF', 'MLP Neural Net': 'MLP', 'GradientBoosting': 'GBM', 'DecisionTree': 'DT'}).index.tolist()
accuracy_nontuned = qwe['Accuracy']
accuracy_tuned = qwe_h['Accuracy']
# Set the figure size
fig, ax = plt.subplots(figsize=(10, 4))
bar_width = 0.35
index = np.arange(len(models))
# Plotting the bars
bars1 = ax.bar(index, accuracy_nontuned, width=bar_width, label='Non-Tuned')
bars2 = ax.bar(index + bar_width, accuracy_tuned, width=bar_width, label='Tuned')
# Annotate the bars with accuracy values
for bar, acc in zip(bars1, accuracy_nontuned):
height = bar.get_height()
ax.annotate(f'{int(acc)}%', xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom', fontsize=10.5)
for bar, acc in zip(bars2, accuracy_tuned):
height = bar.get_height()
ax.annotate(f'{int(acc)}%', xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom', fontsize=10.5)
# Set labels and title
ax.set_xlabel('Models', fontsize=15)
ax.set_ylabel('Accuracy (%)', fontsize=15)
# Rotate x-axis labels for better readability
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(models, rotation=45, fontsize=15)
ax.tick_params(axis='y', which='both', labelsize=15)
# Add legend
ax.legend(loc='lower right')
plt.subplots_adjust(top=1.3)
# Show the plot
plt.show()
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp3 = joblib.load(file)
est_mlp = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
feature_importances_dt = tmp3.models['DecisionTree'].named_steps['classifier'].feature_importances_
feature_importances_rf = tmp3.models['RandomForest'].named_steps['classifier'].feature_importances_
feature_importances_xgb = tmp3.models['XGB'].named_steps['classifier'].feature_importances_
feature_importances_gb = tmp3.models['GradientBoosting'].named_steps['classifier'].feature_importances_
all_feature_importances = np.vstack((feature_importances_dt, feature_importances_rf,
feature_importances_xgb, feature_importances_gb))
mlp_feature_importances = est_mlp([np.sum(np.abs(layer), axis=1) for layer in [layer / np.linalg.norm(layer, ord=2, axis=0)
for layer in tmp3.models['MLP Neural Net'].named_steps['classifier'].coefs_]][0])
all_feature_importances = np.vstack((all_feature_importances, mlp_feature_importances))
fig, ax = plt.subplots(figsize=(8, 4))
# Plotting the bars
bars = ax.bar(df_tmp.columns[:-1].to_list(), np.median(all_feature_importances, axis=0))
ax.set_xlabel('Features', fontsize=15)
ax.set_ylabel('Median Weightage', fontsize=15)
plt.legend().set_visible(False)
ax.set_xticklabels(df_tmp.columns[:-1].to_list(), rotation=45, fontsize=12)
ax.tick_params(axis='y', which='both', labelsize=15)
# Annotate the bars with their values
for bar in bars:
height = bar.get_height()
ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3), textcoords="offset points",
ha='center', va='bottom', fontsize=10)
# Show the plot
plt.subplots_adjust(top=1.2)
plt.show()
WARNING:matplotlib.legend:No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
In [ ]:
def calculate_class_accuracy(obj = None, confusion_matrix_ann = None):
each_model_each_class_accuracy, res = {}, None
if obj is not None:
for model in obj.model_names:
confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
num_classes, class_accuracies = len(confusion_matrix), {}
for i in range(num_classes):
TP = confusion_matrix[i, i]
FP = sum(confusion_matrix[:, i]) - TP
FN = sum(confusion_matrix[i, :]) - TP
TN = np.sum(confusion_matrix) - TP - FP - FN
total_samples = TP + TN + FP + FN
accuracy = (TP + TN) / total_samples
class_accuracies[f'Class {i + 1}'] = {'Accuracy (%)': round(accuracy * 100.00, 2),
'Total Samples': total_samples,
'Total Correct Samples Predicted': TP + TN}
each_model_each_class_accuracy[model] = class_accuracies
if confusion_matrix_ann is not None:
num_classes, class_accuracies = len(confusion_matrix_ann), {}
for i in range(num_classes):
TP = confusion_matrix_ann[i, i]
FP = sum(confusion_matrix_ann[:, i]) - TP
FN = sum(confusion_matrix_ann[i, :]) - TP
TN = np.sum(confusion_matrix_ann) - TP - FP - FN
total_samples = TP + TN + FP + FN
accuracy = (TP + TN) / total_samples
class_accuracies[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2),
'Total Samples': total_samples,
'Total Correct Samples Predicted': TP + TN}
each_model_each_class_accuracy['ANN'] = class_accuracies
for model_name in each_model_each_class_accuracy:
tmp = pd.DataFrame(each_model_each_class_accuracy[model_name]).T.stack(0).reset_index().rename(
columns = {'level_0': 'Class', 'level_1': 'Attributes', 0: model_name}).set_index(
['Class', 'Attributes'], drop = True)
if res is None:
res = tmp
else:
res = res.join(tmp)
return res
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
*train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']
class_accuracy_df = calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")
data_to_plot = class_accuracy_df[['XGB', 'DecisionTree', 'RandomForest', 'GradientBoosting', 'MLP Neural Net']].rename(columns = {'DecisionTree': 'DT', 'RandomForest': 'RF', 'GradientBoosting': 'GBM', 'MLP Neural Net': 'MLP'})
ax = data_to_plot.plot(marker='o', figsize=(10, 6))
for model in data_to_plot.columns:
for index, value in enumerate(data_to_plot[model]):
ax.annotate(f'{value:.2f}%', (index, value), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=11)
new_labels = data_to_plot.index.get_level_values(0).to_list()
ax.set_xticks(range(len(new_labels)))
ax.set_xticklabels(new_labels, fontsize=18)
ax.set_xlabel('Occupancy Levels', fontsize=18)
ax.set_ylabel('Accuracy (%)', fontsize=18)
ax.tick_params(axis='y', which='both', labelsize=18);
ax.legend(fontsize='large')
plt.show()
In [ ]:
def calculate_class_accuracy(obj = None, confusion_matrix_ann = None):
each_model_each_class_accuracy = {}
res = None
if obj is not None:
confusion_matrix = None
c = 0
for model in ['XGB', 'DecisionTree', 'RandomForest', 'GradientBoosting', 'MLP Neural Net']:
if confusion_matrix is not None:
confusion_matrix = np.add(confusion_matrix, obj.get_metric_scores(model)['Test Confusion Matrix'])
else:
confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
c += 1
if confusion_matrix_ann is not None:
confusion_matrix = np.add(confusion_matrix, confusion_matrix_ann)
c += 1
return (confusion_matrix // c)
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
*train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']
all_value_count = Counter(train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42)[3])
total = sum(all_value_count.values())
cm = calculate_class_accuracy(tmp1, tmp2)
fig, ax = plt.subplots()
ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=[f"Class {i}" for i in range(1, cm.shape[1] + 1)], yticklabels=[f"Class {i}\n ({((all_value_count[i - 1] / total) * 100.00):.2f} %)" for i in range(1, cm.shape[0] + 1)],
ylabel='True label',
xlabel='Predicted label')
fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.xlim(-0.5, len(np.unique(ac_lab_y))-0.5)
plt.ylim(len(np.unique(ac_lab_y))-0.5, -0.5)
np.set_printoptions(precision=2)
plt.show();
AC CLASSROOM¶
In [ ]:
with open("/content/ac_classroom_original_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp1 = evaluate_result(tmp1)
ob = torch.load("/content/ac_classroom_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]
tmp1.join(tmp2)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 72.55 | 96.28 | 81.6 | 67.67 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 82.45 | 51.63 | 33.69 | 2.87 | 81.8 |
| F1 Macro | 64.14 | 95.59 | 72.96 | 48.36 | 99.73 | 99.73 | 99.6 | 99.43 | 99.27 | 72.35 | 39.43 | 31.88 | 0.93 | 72.38 | |
| F1 Weighted | 70.28 | 96.27 | 79.18 | 64.7 | 99.8 | 99.8 | 99.6 | 99.5 | 99.49 | 79.88 | 40.88 | 22.22 | 0.16 | 80.58 | |
| Recall Macro | 63.54 | 95.43 | 73.61 | 50.68 | 99.77 | 99.56 | 99.45 | 99.28 | 98.95 | 72.76 | 41.17 | 49.44 | 16.67 | 74.18 | |
| Recall Weighted | 72.55 | 96.28 | 81.6 | 67.67 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 82.45 | 51.63 | 33.69 | 2.87 | 81.8 | |
| Precision Macro | 66.48 | 95.76 | 88.99 | 62.64 | 99.69 | 99.9 | 99.75 | 99.58 | 99.61 | 72.5 | 53.73 | 45.86 | 0.48 | 78.85 | |
| Precision Weighted | 68.57 | 96.27 | 83.33 | 65.35 | 99.8 | 99.8 | 99.6 | 99.5 | 99.5 | 77.74 | 46.8 | 61.21 | 0.08 | 82.45 | |
| Test | Accuracy | 73.1 | 93.75 | 83.35 | 69.86 | 97.66 | 97.59 | 97.51 | 97.59 | 97.81 | 84.4 | 50.26 | 34.06 | 2.19 | 83.27 |
| F1 Macro | 63.72 | 91.0 | 73.73 | 50.12 | 95.8 | 95.58 | 96.18 | 96.32 | 96.37 | 73.03 | 38.82 | 31.03 | 0.71 | 71.13 | |
| F1 Weighted | 71.31 | 93.78 | 81.47 | 67.45 | 97.71 | 97.58 | 97.52 | 97.61 | 97.82 | 82.33 | 39.26 | 22.91 | 0.09 | 82.46 | |
| Recall Macro | 62.02 | 90.87 | 73.63 | 50.75 | 96.99 | 95.05 | 96.52 | 96.55 | 96.23 | 72.65 | 40.38 | 48.55 | 16.67 | 73.79 | |
| Recall Weighted | 73.1 | 93.75 | 83.35 | 69.86 | 97.66 | 97.59 | 97.51 | 97.59 | 97.81 | 84.4 | 50.26 | 34.06 | 2.19 | 83.27 | |
| Precision Macro | 67.42 | 91.22 | 90.05 | 64.89 | 94.84 | 96.13 | 95.88 | 96.13 | 96.52 | 73.93 | 54.71 | 46.04 | 0.36 | 79.1 | |
| Precision Weighted | 70.03 | 93.86 | 84.53 | 67.62 | 97.83 | 97.58 | 97.55 | 97.65 | 97.85 | 80.49 | 46.79 | 64.49 | 0.05 | 84.73 |
In [ ]:
metric_to_show = 'Accuracy'
tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
with open("/content/ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp3 = joblib.load(file)
tmp3 = evaluate_result(tmp3)
ob = torch.load("/content/ac_classroom_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]
tmp3.join(tmp4)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 75.87 | 97.23 | 94.87 | 80.19 | 99.04 | 99.35 | 99.6 | 99.3 | 99.8 | 98.54 | 52.99 | 54.4 | 72.75 | 81.8 |
| F1 Macro | 72.98 | 96.43 | 92.7 | 78.53 | 98.47 | 99.09 | 99.6 | 99.1 | 99.73 | 98.15 | 39.83 | 52.32 | 71.13 | 72.38 | |
| F1 Weighted | 74.38 | 97.22 | 94.69 | 79.81 | 99.05 | 99.34 | 99.6 | 99.29 | 99.8 | 98.54 | 48.06 | 49.99 | 73.09 | 80.58 | |
| Recall Macro | 72.29 | 96.27 | 91.5 | 77.61 | 98.37 | 98.79 | 99.45 | 98.95 | 99.66 | 98.11 | 42.47 | 57.79 | 72.7 | 74.18 | |
| Recall Weighted | 75.87 | 97.23 | 94.87 | 80.19 | 99.04 | 99.35 | 99.6 | 99.3 | 99.8 | 98.54 | 52.99 | 54.4 | 72.75 | 81.8 | |
| Precision Macro | 84.69 | 96.61 | 94.64 | 80.14 | 98.57 | 99.4 | 99.75 | 99.25 | 99.8 | 98.19 | 46.55 | 56.94 | 72.67 | 78.85 | |
| Precision Weighted | 77.22 | 97.22 | 94.82 | 79.73 | 99.05 | 99.34 | 99.6 | 99.29 | 99.8 | 98.55 | 54.57 | 58.54 | 77.45 | 82.45 | |
| Test | Accuracy | 75.96 | 94.72 | 94.8 | 79.5 | 97.06 | 97.59 | 97.59 | 97.51 | 97.51 | 96.01 | 56.52 | 57.27 | 76.79 | 83.27 |
| F1 Macro | 73.3 | 91.94 | 92.45 | 75.84 | 95.02 | 96.06 | 96.26 | 96.07 | 95.86 | 94.25 | 42.46 | 53.0 | 74.12 | 71.13 | |
| F1 Weighted | 74.89 | 94.75 | 94.73 | 79.38 | 97.11 | 97.58 | 97.61 | 97.52 | 97.54 | 96.04 | 52.01 | 54.01 | 77.68 | 82.46 | |
| Recall Macro | 71.63 | 92.23 | 90.96 | 74.18 | 95.17 | 95.26 | 96.77 | 95.75 | 96.07 | 94.66 | 44.66 | 58.64 | 75.93 | 73.79 | |
| Recall Weighted | 75.96 | 94.72 | 94.8 | 79.5 | 97.06 | 97.59 | 97.59 | 97.51 | 97.51 | 96.01 | 56.52 | 57.27 | 76.79 | 83.27 | |
| Precision Macro | 84.57 | 91.74 | 94.26 | 77.79 | 95.06 | 96.91 | 95.8 | 96.41 | 95.7 | 93.92 | 50.62 | 57.86 | 75.49 | 79.1 | |
| Precision Weighted | 76.83 | 94.82 | 94.77 | 79.44 | 97.22 | 97.59 | 97.64 | 97.54 | 97.59 | 96.11 | 59.99 | 64.08 | 82.63 | 84.73 |
In [ ]:
metric_to_show = 'Accuracy'
tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
NON AC CLASSROOM¶
In [ ]:
with open("/content/non_ac_classroom_original_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp1 = evaluate_result(tmp1)
ob = torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[1]
tmp1.join(tmp2)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 77.07 | 97.53 | 82.12 | 71.89 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 77.07 | 69.54 | 81.13 | 72.38 | 88.41 |
| F1 Macro | 55.99 | 96.07 | 68.99 | 41.59 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 53.1 | 63.58 | 75.12 | 56.02 | 79.12 | |
| F1 Weighted | 72.66 | 97.54 | 80.4 | 64.24 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 71.31 | 70.84 | 82.33 | 67.14 | 88.35 | |
| Recall Macro | 58.56 | 96.28 | 71.11 | 47.77 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 58.03 | 63.04 | 78.52 | 58.6 | 79.05 | |
| Recall Weighted | 77.07 | 97.53 | 82.12 | 71.89 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 77.07 | 69.54 | 81.13 | 72.38 | 88.41 | |
| Precision Macro | 60.52 | 95.87 | 72.52 | 54.42 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 49.25 | 71.41 | 78.15 | 65.99 | 80.14 | |
| Precision Weighted | 72.39 | 97.55 | 81.89 | 68.01 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 66.75 | 78.37 | 88.87 | 74.67 | 88.74 | |
| Test | Accuracy | 76.2 | 96.31 | 79.34 | 71.59 | 96.13 | 97.23 | 97.6 | 97.6 | 96.31 | 75.83 | 68.27 | 82.29 | 71.03 | 87.64 |
| F1 Macro | 55.9 | 94.21 | 66.44 | 42.59 | 93.8 | 95.39 | 96.32 | 96.12 | 94.49 | 53.3 | 61.31 | 77.98 | 57.81 | 79.59 | |
| F1 Weighted | 70.65 | 96.23 | 77.14 | 62.6 | 96.12 | 97.22 | 97.61 | 97.6 | 96.31 | 69.19 | 68.99 | 83.54 | 66.13 | 87.61 | |
| Recall Macro | 59.84 | 93.87 | 68.1 | 48.84 | 93.85 | 95.13 | 96.08 | 95.78 | 94.53 | 59.38 | 59.77 | 80.44 | 60.95 | 79.67 | |
| Recall Weighted | 76.2 | 96.31 | 79.34 | 71.59 | 96.13 | 97.23 | 97.6 | 97.6 | 96.31 | 75.83 | 68.27 | 82.29 | 71.03 | 87.64 | |
| Precision Macro | 61.06 | 94.76 | 69.4 | 54.63 | 93.78 | 95.66 | 96.6 | 96.48 | 94.48 | 48.88 | 68.43 | 80.5 | 65.16 | 80.92 | |
| Precision Weighted | 71.09 | 96.29 | 77.85 | 64.59 | 96.12 | 97.21 | 97.63 | 97.61 | 96.32 | 64.09 | 73.85 | 89.17 | 73.0 | 88.27 |
In [ ]:
metric_to_show = 'Accuracy'
tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
with open("/content/non_ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp3 = joblib.load(file)
tmp3 = evaluate_result(tmp3)
ob = torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[1]
tmp3.join(tmp4)
Out[ ]:
| LogisticRegression | KNN | SVM | Linear SVM | DecisionTree | RandomForest | XGB | LGBM | GradientBoosting | MLP Neural Net | AdaBoost | Naive Bayes | QDA | ANN | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Metrics | |||||||||||||||
| Train | Accuracy | 83.85 | 100.0 | 97.78 | 84.96 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 67.45 | 83.48 | 75.59 | 88.41 |
| F1 Macro | 71.88 | 100.0 | 95.99 | 71.27 | 99.13 | 100.0 | 100.0 | 100.0 | 100.0 | 98.89 | 55.33 | 77.79 | 59.82 | 79.12 | |
| F1 Weighted | 82.38 | 100.0 | 97.78 | 82.6 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 66.42 | 84.76 | 73.27 | 88.35 | |
| Recall Macro | 70.01 | 100.0 | 95.95 | 71.08 | 99.01 | 100.0 | 100.0 | 100.0 | 100.0 | 98.95 | 52.83 | 81.19 | 65.3 | 79.05 | |
| Recall Weighted | 83.85 | 100.0 | 97.78 | 84.96 | 99.51 | 100.0 | 100.0 | 100.0 | 100.0 | 99.51 | 67.45 | 83.48 | 75.59 | 88.41 | |
| Precision Macro | 78.5 | 100.0 | 96.05 | 87.0 | 99.28 | 100.0 | 100.0 | 100.0 | 100.0 | 98.87 | 63.59 | 79.05 | 61.57 | 80.14 | |
| Precision Weighted | 83.23 | 100.0 | 97.79 | 86.97 | 99.52 | 100.0 | 100.0 | 100.0 | 100.0 | 99.52 | 71.07 | 89.74 | 77.17 | 88.74 | |
| Test | Accuracy | 80.81 | 95.2 | 95.57 | 83.39 | 96.13 | 97.42 | 97.23 | 97.6 | 96.49 | 96.86 | 64.76 | 84.5 | 73.8 | 87.64 |
| F1 Macro | 68.97 | 92.3 | 92.94 | 69.71 | 93.97 | 95.75 | 95.57 | 95.98 | 94.72 | 94.64 | 54.23 | 79.72 | 59.46 | 79.59 | |
| F1 Weighted | 79.11 | 95.16 | 95.51 | 80.63 | 96.11 | 97.41 | 97.23 | 97.58 | 96.49 | 96.86 | 63.45 | 85.75 | 71.23 | 87.61 | |
| Recall Macro | 69.02 | 92.08 | 92.32 | 70.58 | 93.72 | 95.45 | 95.45 | 95.69 | 94.62 | 94.36 | 52.03 | 82.21 | 64.66 | 79.67 | |
| Recall Weighted | 80.81 | 95.2 | 95.57 | 83.39 | 96.13 | 97.42 | 97.23 | 97.6 | 96.49 | 96.86 | 64.76 | 84.5 | 73.8 | 87.64 | |
| Precision Macro | 74.75 | 92.54 | 93.69 | 84.74 | 94.48 | 96.07 | 95.71 | 96.31 | 94.82 | 95.05 | 62.36 | 80.62 | 60.39 | 80.92 | |
| Precision Weighted | 80.73 | 95.13 | 95.53 | 86.05 | 96.23 | 97.41 | 97.23 | 97.58 | 96.48 | 96.91 | 68.05 | 89.78 | 74.4 | 88.27 |
In [ ]:
metric_to_show = 'Accuracy'
tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
In [ ]:
metric_to_show = 'Accuracy' # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')
# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
Each Class Accuracy, Precision, Recall, F1 Score of Prediction¶
In [ ]:
def calculate_class_metrics(obj = None, confusion_matrix_ann = None):
each_model_each_class_metrics = {}
res = None
epsilon = 1e-7 # small constant
if obj is not None:
for model in obj.model_names:
confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
num_classes = len(confusion_matrix)
class_metrics = {}
for i in range(num_classes):
TP = confusion_matrix[i, i]
FP = sum(confusion_matrix[:, i]) - TP
FN = sum(confusion_matrix[i, :]) - TP
total_samples = TP + FP + FN
accuracy = TP / (total_samples + epsilon)
precision = TP / (TP + FP + epsilon)
recall = TP / (TP + FN + epsilon)
f1_score = 2 * (precision * recall) / (precision + recall + epsilon)
class_metrics[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2), 'Precision (%)': round(precision * 100.00, 2), 'Recall (%)': round(recall * 100.00, 2), 'F1 Score (%)': round(f1_score * 100.00, 2)}
each_model_each_class_metrics[model] = class_metrics
if confusion_matrix_ann is not None:
num_classes = len(confusion_matrix_ann)
class_metrics = {}
for i in range(num_classes):
TP = confusion_matrix_ann[i, i]
FP = sum(confusion_matrix_ann[:, i]) - TP
FN = sum(confusion_matrix_ann[i, :]) - TP
total_samples = TP + FP + FN
accuracy = TP / (total_samples + epsilon)
precision = TP / (TP + FP + epsilon)
recall = TP / (TP + FN + epsilon)
f1_score = 2 * (precision * recall) / (precision + recall + epsilon)
class_metrics[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2), 'Precision (%)': round(precision * 100.00, 2), 'Recall (%)': round(recall * 100.00, 2), 'F1 Score (%)': round(f1_score * 100.00, 2)}
each_model_each_class_metrics['ANN'] = class_metrics
for model_name in each_model_each_class_metrics:
tmp = pd.DataFrame(each_model_each_class_metrics[model_name]).T.stack(0).reset_index().rename(columns = {'level_0': 'Class', 'level_1': 'Attributes', 0: model_name}).set_index(['Class', 'Attributes'], drop = True)
if res is None:
res = tmp
else:
res = res.join(tmp, how='outer')
return res
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
*train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']
calculate_class_metrics(tmp1, tmp2)
In [ ]:
with open("/content/ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp2 = evaluate_ann(torch.load("/content/ac_classroom_ann_model_object_40_test.pkl"),
*train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']
calculate_class_metrics(tmp1, tmp2)
# calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")
In [ ]:
with open("/content/non_ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
tmp1 = joblib.load(file)
tmp2 = evaluate_ann(torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl"),
*train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']
calculate_class_metrics(tmp1, tmp2)
# calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")